more fixes before final part
This commit is contained in:
parent
8f90c1b16d
commit
92fb052594
2
Makefile
2
Makefile
@ -18,7 +18,7 @@ clean:
|
|||||||
# Run all tests
|
# Run all tests
|
||||||
test: generate
|
test: generate
|
||||||
@echo "Running all tests..."
|
@echo "Running all tests..."
|
||||||
@go test -v -count=1 ./... --coverprofile=coverage.out
|
@go test -v -count=1 ./... --coverprofile=coverage.out --short
|
||||||
|
|
||||||
# Run unit tests only (faster, no integration tests)
|
# Run unit tests only (faster, no integration tests)
|
||||||
test-unit:
|
test-unit:
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.dws.rip/dubey/kat/internal/agent"
|
||||||
"git.dws.rip/dubey/kat/internal/api"
|
"git.dws.rip/dubey/kat/internal/api"
|
||||||
"git.dws.rip/dubey/kat/internal/cli"
|
"git.dws.rip/dubey/kat/internal/cli"
|
||||||
"git.dws.rip/dubey/kat/internal/config"
|
"git.dws.rip/dubey/kat/internal/config"
|
||||||
@ -265,7 +266,7 @@ func runInit(cmd *cobra.Command, args []string) {
|
|||||||
joinHandler := api.NewJoinHandler(etcdStore, caKeyPath, caCertPath)
|
joinHandler := api.NewJoinHandler(etcdStore, caKeyPath, caCertPath)
|
||||||
apiServer.RegisterJoinHandler(joinHandler)
|
apiServer.RegisterJoinHandler(joinHandler)
|
||||||
log.Printf("Registered join handler with CA key: %s, CA cert: %s", caKeyPath, caCertPath)
|
log.Printf("Registered join handler with CA key: %s, CA cert: %s", caKeyPath, caCertPath)
|
||||||
|
|
||||||
// Register the node status handler
|
// Register the node status handler
|
||||||
nodeStatusHandler := api.NewNodeStatusHandler(etcdStore)
|
nodeStatusHandler := api.NewNodeStatusHandler(etcdStore)
|
||||||
apiServer.RegisterNodeStatusHandler(nodeStatusHandler)
|
apiServer.RegisterNodeStatusHandler(nodeStatusHandler)
|
||||||
@ -343,11 +344,11 @@ func runJoin(cmd *cobra.Command, args []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Successfully joined cluster. Node is ready.")
|
log.Printf("Successfully joined cluster. Node is ready.")
|
||||||
|
|
||||||
// Setup signal handling for graceful shutdown
|
// Setup signal handling for graceful shutdown
|
||||||
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||||
defer stop()
|
defer stop()
|
||||||
|
|
||||||
// Create and start the agent with heartbeating
|
// Create and start the agent with heartbeating
|
||||||
agent, err := agent.NewAgent(
|
agent, err := agent.NewAgent(
|
||||||
joinResp.NodeName,
|
joinResp.NodeName,
|
||||||
@ -360,19 +361,19 @@ func runJoin(cmd *cobra.Command, args []string) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Failed to create agent: %v", err)
|
log.Fatalf("Failed to create agent: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup mTLS client
|
// Setup mTLS client
|
||||||
if err := agent.SetupMTLSClient(); err != nil {
|
if err := agent.SetupMTLSClient(); err != nil {
|
||||||
log.Fatalf("Failed to setup mTLS client: %v", err)
|
log.Fatalf("Failed to setup mTLS client: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start heartbeating
|
// Start heartbeating
|
||||||
if err := agent.StartHeartbeat(ctx); err != nil {
|
if err := agent.StartHeartbeat(ctx); err != nil {
|
||||||
log.Fatalf("Failed to start heartbeat: %v", err)
|
log.Fatalf("Failed to start heartbeat: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Node %s is now running with heartbeat. Press Ctrl+C to exit.", nodeName)
|
log.Printf("Node %s is now running with heartbeat. Press Ctrl+C to exit.", nodeName)
|
||||||
|
|
||||||
// Wait for shutdown signal
|
// Wait for shutdown signal
|
||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
log.Println("Received shutdown signal. Stopping heartbeat...")
|
log.Println("Received shutdown signal. Stopping heartbeat...")
|
||||||
|
@ -17,12 +17,12 @@ import (
|
|||||||
|
|
||||||
// NodeStatus represents the data sent in a heartbeat
|
// NodeStatus represents the data sent in a heartbeat
|
||||||
type NodeStatus struct {
|
type NodeStatus struct {
|
||||||
NodeName string `json:"nodeName"`
|
NodeName string `json:"nodeName"`
|
||||||
NodeUID string `json:"nodeUID"`
|
NodeUID string `json:"nodeUID"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
Resources Resources `json:"resources"`
|
Resources Resources `json:"resources"`
|
||||||
Workloads []WorkloadStatus `json:"workloadInstances,omitempty"`
|
Workloads []WorkloadStatus `json:"workloadInstances,omitempty"`
|
||||||
NetworkInfo NetworkInfo `json:"overlayNetwork"`
|
NetworkInfo NetworkInfo `json:"overlayNetwork"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resources represents the node's resource capacity and usage
|
// Resources represents the node's resource capacity and usage
|
||||||
@ -39,15 +39,15 @@ type ResourceMetrics struct {
|
|||||||
|
|
||||||
// WorkloadStatus represents the status of a workload instance
|
// WorkloadStatus represents the status of a workload instance
|
||||||
type WorkloadStatus struct {
|
type WorkloadStatus struct {
|
||||||
WorkloadName string `json:"workloadName"`
|
WorkloadName string `json:"workloadName"`
|
||||||
Namespace string `json:"namespace"`
|
Namespace string `json:"namespace"`
|
||||||
InstanceID string `json:"instanceID"`
|
InstanceID string `json:"instanceID"`
|
||||||
ContainerID string `json:"containerID"`
|
ContainerID string `json:"containerID"`
|
||||||
ImageID string `json:"imageID"`
|
ImageID string `json:"imageID"`
|
||||||
State string `json:"state"` // "running", "exited", "paused", "unknown"
|
State string `json:"state"` // "running", "exited", "paused", "unknown"
|
||||||
ExitCode int `json:"exitCode"`
|
ExitCode int `json:"exitCode"`
|
||||||
HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check"
|
HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check"
|
||||||
Restarts int `json:"restarts"`
|
Restarts int `json:"restarts"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NetworkInfo contains information about the node's overlay network
|
// NetworkInfo contains information about the node's overlay network
|
||||||
@ -63,10 +63,10 @@ type Agent struct {
|
|||||||
LeaderAPI string
|
LeaderAPI string
|
||||||
AdvertiseAddr string
|
AdvertiseAddr string
|
||||||
PKIDir string
|
PKIDir string
|
||||||
|
|
||||||
// mTLS client for leader communication
|
// mTLS client for leader communication
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
|
||||||
// Heartbeat configuration
|
// Heartbeat configuration
|
||||||
heartbeatInterval time.Duration
|
heartbeatInterval time.Duration
|
||||||
stopHeartbeat chan struct{}
|
stopHeartbeat chan struct{}
|
||||||
@ -77,7 +77,7 @@ func NewAgent(nodeName, nodeUID, leaderAPI, advertiseAddr, pkiDir string, heartb
|
|||||||
if heartbeatIntervalSeconds <= 0 {
|
if heartbeatIntervalSeconds <= 0 {
|
||||||
heartbeatIntervalSeconds = 15 // Default to 15 seconds
|
heartbeatIntervalSeconds = 15 // Default to 15 seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Agent{
|
return &Agent{
|
||||||
NodeName: nodeName,
|
NodeName: nodeName,
|
||||||
NodeUID: nodeUID,
|
NodeUID: nodeUID,
|
||||||
@ -125,29 +125,29 @@ func (a *Agent) SetupMTLSClient() error {
|
|||||||
// Override the dial function to map any hostname to the leader's IP
|
// Override the dial function to map any hostname to the leader's IP
|
||||||
DialTLS: func(network, addr string) (net.Conn, error) {
|
DialTLS: func(network, addr string) (net.Conn, error) {
|
||||||
// Extract host and port from addr
|
// Extract host and port from addr
|
||||||
host, port, err := net.SplitHostPort(addr)
|
_, port, err := net.SplitHostPort(addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract host and port from LeaderAPI
|
// Extract host and port from LeaderAPI
|
||||||
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI)
|
leaderHost, _, err := net.SplitHostPort(a.LeaderAPI)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the leader's IP but keep the original port
|
// Use the leader's IP but keep the original port
|
||||||
dialAddr := net.JoinHostPort(leaderHost, port)
|
dialAddr := net.JoinHostPort(leaderHost, port)
|
||||||
|
|
||||||
// For logging purposes
|
// For logging purposes
|
||||||
log.Printf("Dialing %s instead of %s", dialAddr, addr)
|
log.Printf("Dialing %s instead of %s", dialAddr, addr)
|
||||||
|
|
||||||
// Create the TLS connection
|
// Create the TLS connection
|
||||||
conn, err := tls.Dial(network, dialAddr, tlsConfig)
|
conn, err := tls.Dial(network, dialAddr, tlsConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return conn, nil
|
return conn, nil
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -211,8 +211,13 @@ func (a *Agent) sendHeartbeat() error {
|
|||||||
return fmt.Errorf("failed to marshal node status: %w", err)
|
return fmt.Errorf("failed to marshal node status: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Construct URL - use leader.kat.cluster.local as hostname to match certificate
|
// Construct URL - use leader.kat.cluster.local as hostname to match certificate
|
||||||
url := fmt.Sprintf("https://leader.kat.cluster.local/v1alpha1/nodes/%s/status", a.NodeName)
|
url := fmt.Sprintf("https://%s:%s/v1alpha1/nodes/%s/status", leaderHost, leaderPort, a.NodeName)
|
||||||
|
|
||||||
// Create request
|
// Create request
|
||||||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(statusJSON))
|
req, err := http.NewRequest("POST", url, bytes.NewBuffer(statusJSON))
|
||||||
@ -241,19 +246,19 @@ func (a *Agent) sendHeartbeat() error {
|
|||||||
func (a *Agent) gatherNodeStatus() NodeStatus {
|
func (a *Agent) gatherNodeStatus() NodeStatus {
|
||||||
// For now, just provide basic information
|
// For now, just provide basic information
|
||||||
// In future phases, this will include actual resource usage, workload status, etc.
|
// In future phases, this will include actual resource usage, workload status, etc.
|
||||||
|
|
||||||
// Get basic system info for initial capacity reporting
|
// Get basic system info for initial capacity reporting
|
||||||
var m runtime.MemStats
|
var m runtime.MemStats
|
||||||
runtime.ReadMemStats(&m)
|
runtime.ReadMemStats(&m)
|
||||||
|
|
||||||
// Convert to human-readable format (very simplified for now)
|
// Convert to human-readable format (very simplified for now)
|
||||||
cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU() * 1000)
|
cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU()*1000)
|
||||||
memCapacity := fmt.Sprintf("%dMi", m.Sys / (1024 * 1024))
|
memCapacity := fmt.Sprintf("%dMi", m.Sys/(1024*1024))
|
||||||
|
|
||||||
// For allocatable, we'll just use 90% of capacity for this phase
|
// For allocatable, we'll just use 90% of capacity for this phase
|
||||||
cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU() * 900)
|
cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU()*900)
|
||||||
memAllocatable := fmt.Sprintf("%dMi", (m.Sys / (1024 * 1024)) * 9 / 10)
|
memAllocatable := fmt.Sprintf("%dMi", (m.Sys/(1024*1024))*9/10)
|
||||||
|
|
||||||
return NodeStatus{
|
return NodeStatus{
|
||||||
NodeName: a.NodeName,
|
NodeName: a.NodeName,
|
||||||
NodeUID: a.NodeUID,
|
NodeUID: a.NodeUID,
|
||||||
|
@ -13,6 +13,8 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"crypto/x509/pkix"
|
||||||
|
|
||||||
"git.dws.rip/dubey/kat/internal/pki"
|
"git.dws.rip/dubey/kat/internal/pki"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -98,7 +100,7 @@ func TestAgentHeartbeat(t *testing.T) {
|
|||||||
t.Fatalf("Failed to read CA certificate: %v", err)
|
t.Fatalf("Failed to read CA certificate: %v", err)
|
||||||
}
|
}
|
||||||
server.TLS.ClientCAs.AppendCertsFromPEM(caCertData)
|
server.TLS.ClientCAs.AppendCertsFromPEM(caCertData)
|
||||||
|
|
||||||
// Set the server certificate to use the test node name as CN
|
// Set the server certificate to use the test node name as CN
|
||||||
// to match what our test agent will expect
|
// to match what our test agent will expect
|
||||||
server.TLS.Certificates = []tls.Certificate{
|
server.TLS.Certificates = []tls.Certificate{
|
||||||
|
@ -2,14 +2,12 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.dws.rip/dubey/kat/internal/store"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user