|
|
|
@ -17,12 +17,12 @@ import (
|
|
|
|
|
|
|
|
|
|
// NodeStatus represents the data sent in a heartbeat
|
|
|
|
|
type NodeStatus struct {
|
|
|
|
|
NodeName string `json:"nodeName"`
|
|
|
|
|
NodeUID string `json:"nodeUID"`
|
|
|
|
|
Timestamp time.Time `json:"timestamp"`
|
|
|
|
|
Resources Resources `json:"resources"`
|
|
|
|
|
NodeName string `json:"nodeName"`
|
|
|
|
|
NodeUID string `json:"nodeUID"`
|
|
|
|
|
Timestamp time.Time `json:"timestamp"`
|
|
|
|
|
Resources Resources `json:"resources"`
|
|
|
|
|
Workloads []WorkloadStatus `json:"workloadInstances,omitempty"`
|
|
|
|
|
NetworkInfo NetworkInfo `json:"overlayNetwork"`
|
|
|
|
|
NetworkInfo NetworkInfo `json:"overlayNetwork"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Resources represents the node's resource capacity and usage
|
|
|
|
@ -39,15 +39,15 @@ type ResourceMetrics struct {
|
|
|
|
|
|
|
|
|
|
// WorkloadStatus represents the status of a workload instance
|
|
|
|
|
type WorkloadStatus struct {
|
|
|
|
|
WorkloadName string `json:"workloadName"`
|
|
|
|
|
Namespace string `json:"namespace"`
|
|
|
|
|
InstanceID string `json:"instanceID"`
|
|
|
|
|
ContainerID string `json:"containerID"`
|
|
|
|
|
ImageID string `json:"imageID"`
|
|
|
|
|
State string `json:"state"` // "running", "exited", "paused", "unknown"
|
|
|
|
|
ExitCode int `json:"exitCode"`
|
|
|
|
|
HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check"
|
|
|
|
|
Restarts int `json:"restarts"`
|
|
|
|
|
WorkloadName string `json:"workloadName"`
|
|
|
|
|
Namespace string `json:"namespace"`
|
|
|
|
|
InstanceID string `json:"instanceID"`
|
|
|
|
|
ContainerID string `json:"containerID"`
|
|
|
|
|
ImageID string `json:"imageID"`
|
|
|
|
|
State string `json:"state"` // "running", "exited", "paused", "unknown"
|
|
|
|
|
ExitCode int `json:"exitCode"`
|
|
|
|
|
HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check"
|
|
|
|
|
Restarts int `json:"restarts"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NetworkInfo contains information about the node's overlay network
|
|
|
|
@ -65,7 +65,7 @@ type Agent struct {
|
|
|
|
|
PKIDir string
|
|
|
|
|
|
|
|
|
|
// mTLS client for leader communication
|
|
|
|
|
client *http.Client
|
|
|
|
|
client *http.Client
|
|
|
|
|
|
|
|
|
|
// Heartbeat configuration
|
|
|
|
|
heartbeatInterval time.Duration
|
|
|
|
@ -125,13 +125,13 @@ func (a *Agent) SetupMTLSClient() error {
|
|
|
|
|
// Override the dial function to map any hostname to the leader's IP
|
|
|
|
|
DialTLS: func(network, addr string) (net.Conn, error) {
|
|
|
|
|
// Extract host and port from addr
|
|
|
|
|
host, port, err := net.SplitHostPort(addr)
|
|
|
|
|
_, port, err := net.SplitHostPort(addr)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Extract host and port from LeaderAPI
|
|
|
|
|
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI)
|
|
|
|
|
leaderHost, _, err := net.SplitHostPort(a.LeaderAPI)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
@ -211,8 +211,13 @@ func (a *Agent) sendHeartbeat() error {
|
|
|
|
|
return fmt.Errorf("failed to marshal node status: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Construct URL - use leader.kat.cluster.local as hostname to match certificate
|
|
|
|
|
url := fmt.Sprintf("https://leader.kat.cluster.local/v1alpha1/nodes/%s/status", a.NodeName)
|
|
|
|
|
url := fmt.Sprintf("https://%s:%s/v1alpha1/nodes/%s/status", leaderHost, leaderPort, a.NodeName)
|
|
|
|
|
|
|
|
|
|
// Create request
|
|
|
|
|
req, err := http.NewRequest("POST", url, bytes.NewBuffer(statusJSON))
|
|
|
|
@ -247,12 +252,12 @@ func (a *Agent) gatherNodeStatus() NodeStatus {
|
|
|
|
|
runtime.ReadMemStats(&m)
|
|
|
|
|
|
|
|
|
|
// Convert to human-readable format (very simplified for now)
|
|
|
|
|
cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU() * 1000)
|
|
|
|
|
memCapacity := fmt.Sprintf("%dMi", m.Sys / (1024 * 1024))
|
|
|
|
|
cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU()*1000)
|
|
|
|
|
memCapacity := fmt.Sprintf("%dMi", m.Sys/(1024*1024))
|
|
|
|
|
|
|
|
|
|
// For allocatable, we'll just use 90% of capacity for this phase
|
|
|
|
|
cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU() * 900)
|
|
|
|
|
memAllocatable := fmt.Sprintf("%dMi", (m.Sys / (1024 * 1024)) * 9 / 10)
|
|
|
|
|
cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU()*900)
|
|
|
|
|
memAllocatable := fmt.Sprintf("%dMi", (m.Sys/(1024*1024))*9/10)
|
|
|
|
|
|
|
|
|
|
return NodeStatus{
|
|
|
|
|
NodeName: a.NodeName,
|
|
|
|
|