more fixes before final part
All checks were successful
Unit Tests / unit-tests (pull_request) Successful in 9m58s
Integration Tests / integration-tests (pull_request) Successful in 9m58s

This commit is contained in:
Tanishq Dubey 2025-05-18 11:35:22 -04:00
parent 8f90c1b16d
commit 92fb052594
No known key found for this signature in database
GPG Key ID: CFC1931B84DFC3F9
5 changed files with 51 additions and 45 deletions

View File

@ -18,7 +18,7 @@ clean:
# Run all tests # Run all tests
test: generate test: generate
@echo "Running all tests..." @echo "Running all tests..."
@go test -v -count=1 ./... --coverprofile=coverage.out @go test -v -count=1 ./... --coverprofile=coverage.out --short
# Run unit tests only (faster, no integration tests) # Run unit tests only (faster, no integration tests)
test-unit: test-unit:

View File

@ -11,6 +11,7 @@ import (
"syscall" "syscall"
"time" "time"
"git.dws.rip/dubey/kat/internal/agent"
"git.dws.rip/dubey/kat/internal/api" "git.dws.rip/dubey/kat/internal/api"
"git.dws.rip/dubey/kat/internal/cli" "git.dws.rip/dubey/kat/internal/cli"
"git.dws.rip/dubey/kat/internal/config" "git.dws.rip/dubey/kat/internal/config"
@ -265,7 +266,7 @@ func runInit(cmd *cobra.Command, args []string) {
joinHandler := api.NewJoinHandler(etcdStore, caKeyPath, caCertPath) joinHandler := api.NewJoinHandler(etcdStore, caKeyPath, caCertPath)
apiServer.RegisterJoinHandler(joinHandler) apiServer.RegisterJoinHandler(joinHandler)
log.Printf("Registered join handler with CA key: %s, CA cert: %s", caKeyPath, caCertPath) log.Printf("Registered join handler with CA key: %s, CA cert: %s", caKeyPath, caCertPath)
// Register the node status handler // Register the node status handler
nodeStatusHandler := api.NewNodeStatusHandler(etcdStore) nodeStatusHandler := api.NewNodeStatusHandler(etcdStore)
apiServer.RegisterNodeStatusHandler(nodeStatusHandler) apiServer.RegisterNodeStatusHandler(nodeStatusHandler)
@ -343,11 +344,11 @@ func runJoin(cmd *cobra.Command, args []string) {
} }
log.Printf("Successfully joined cluster. Node is ready.") log.Printf("Successfully joined cluster. Node is ready.")
// Setup signal handling for graceful shutdown // Setup signal handling for graceful shutdown
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop() defer stop()
// Create and start the agent with heartbeating // Create and start the agent with heartbeating
agent, err := agent.NewAgent( agent, err := agent.NewAgent(
joinResp.NodeName, joinResp.NodeName,
@ -360,19 +361,19 @@ func runJoin(cmd *cobra.Command, args []string) {
if err != nil { if err != nil {
log.Fatalf("Failed to create agent: %v", err) log.Fatalf("Failed to create agent: %v", err)
} }
// Setup mTLS client // Setup mTLS client
if err := agent.SetupMTLSClient(); err != nil { if err := agent.SetupMTLSClient(); err != nil {
log.Fatalf("Failed to setup mTLS client: %v", err) log.Fatalf("Failed to setup mTLS client: %v", err)
} }
// Start heartbeating // Start heartbeating
if err := agent.StartHeartbeat(ctx); err != nil { if err := agent.StartHeartbeat(ctx); err != nil {
log.Fatalf("Failed to start heartbeat: %v", err) log.Fatalf("Failed to start heartbeat: %v", err)
} }
log.Printf("Node %s is now running with heartbeat. Press Ctrl+C to exit.", nodeName) log.Printf("Node %s is now running with heartbeat. Press Ctrl+C to exit.", nodeName)
// Wait for shutdown signal // Wait for shutdown signal
<-ctx.Done() <-ctx.Done()
log.Println("Received shutdown signal. Stopping heartbeat...") log.Println("Received shutdown signal. Stopping heartbeat...")

View File

@ -17,12 +17,12 @@ import (
// NodeStatus represents the data sent in a heartbeat // NodeStatus represents the data sent in a heartbeat
type NodeStatus struct { type NodeStatus struct {
NodeName string `json:"nodeName"` NodeName string `json:"nodeName"`
NodeUID string `json:"nodeUID"` NodeUID string `json:"nodeUID"`
Timestamp time.Time `json:"timestamp"` Timestamp time.Time `json:"timestamp"`
Resources Resources `json:"resources"` Resources Resources `json:"resources"`
Workloads []WorkloadStatus `json:"workloadInstances,omitempty"` Workloads []WorkloadStatus `json:"workloadInstances,omitempty"`
NetworkInfo NetworkInfo `json:"overlayNetwork"` NetworkInfo NetworkInfo `json:"overlayNetwork"`
} }
// Resources represents the node's resource capacity and usage // Resources represents the node's resource capacity and usage
@ -39,15 +39,15 @@ type ResourceMetrics struct {
// WorkloadStatus represents the status of a workload instance // WorkloadStatus represents the status of a workload instance
type WorkloadStatus struct { type WorkloadStatus struct {
WorkloadName string `json:"workloadName"` WorkloadName string `json:"workloadName"`
Namespace string `json:"namespace"` Namespace string `json:"namespace"`
InstanceID string `json:"instanceID"` InstanceID string `json:"instanceID"`
ContainerID string `json:"containerID"` ContainerID string `json:"containerID"`
ImageID string `json:"imageID"` ImageID string `json:"imageID"`
State string `json:"state"` // "running", "exited", "paused", "unknown" State string `json:"state"` // "running", "exited", "paused", "unknown"
ExitCode int `json:"exitCode"` ExitCode int `json:"exitCode"`
HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check" HealthStatus string `json:"healthStatus"` // "healthy", "unhealthy", "pending_check"
Restarts int `json:"restarts"` Restarts int `json:"restarts"`
} }
// NetworkInfo contains information about the node's overlay network // NetworkInfo contains information about the node's overlay network
@ -63,10 +63,10 @@ type Agent struct {
LeaderAPI string LeaderAPI string
AdvertiseAddr string AdvertiseAddr string
PKIDir string PKIDir string
// mTLS client for leader communication // mTLS client for leader communication
client *http.Client client *http.Client
// Heartbeat configuration // Heartbeat configuration
heartbeatInterval time.Duration heartbeatInterval time.Duration
stopHeartbeat chan struct{} stopHeartbeat chan struct{}
@ -77,7 +77,7 @@ func NewAgent(nodeName, nodeUID, leaderAPI, advertiseAddr, pkiDir string, heartb
if heartbeatIntervalSeconds <= 0 { if heartbeatIntervalSeconds <= 0 {
heartbeatIntervalSeconds = 15 // Default to 15 seconds heartbeatIntervalSeconds = 15 // Default to 15 seconds
} }
return &Agent{ return &Agent{
NodeName: nodeName, NodeName: nodeName,
NodeUID: nodeUID, NodeUID: nodeUID,
@ -125,29 +125,29 @@ func (a *Agent) SetupMTLSClient() error {
// Override the dial function to map any hostname to the leader's IP // Override the dial function to map any hostname to the leader's IP
DialTLS: func(network, addr string) (net.Conn, error) { DialTLS: func(network, addr string) (net.Conn, error) {
// Extract host and port from addr // Extract host and port from addr
host, port, err := net.SplitHostPort(addr) _, port, err := net.SplitHostPort(addr)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Extract host and port from LeaderAPI // Extract host and port from LeaderAPI
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI) leaderHost, _, err := net.SplitHostPort(a.LeaderAPI)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Use the leader's IP but keep the original port // Use the leader's IP but keep the original port
dialAddr := net.JoinHostPort(leaderHost, port) dialAddr := net.JoinHostPort(leaderHost, port)
// For logging purposes // For logging purposes
log.Printf("Dialing %s instead of %s", dialAddr, addr) log.Printf("Dialing %s instead of %s", dialAddr, addr)
// Create the TLS connection // Create the TLS connection
conn, err := tls.Dial(network, dialAddr, tlsConfig) conn, err := tls.Dial(network, dialAddr, tlsConfig)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return conn, nil return conn, nil
}, },
}, },
@ -211,8 +211,13 @@ func (a *Agent) sendHeartbeat() error {
return fmt.Errorf("failed to marshal node status: %w", err) return fmt.Errorf("failed to marshal node status: %w", err)
} }
leaderHost, leaderPort, err := net.SplitHostPort(a.LeaderAPI)
if err != nil {
return err
}
// Construct URL - use leader.kat.cluster.local as hostname to match certificate // Construct URL - use leader.kat.cluster.local as hostname to match certificate
url := fmt.Sprintf("https://leader.kat.cluster.local/v1alpha1/nodes/%s/status", a.NodeName) url := fmt.Sprintf("https://%s:%s/v1alpha1/nodes/%s/status", leaderHost, leaderPort, a.NodeName)
// Create request // Create request
req, err := http.NewRequest("POST", url, bytes.NewBuffer(statusJSON)) req, err := http.NewRequest("POST", url, bytes.NewBuffer(statusJSON))
@ -241,19 +246,19 @@ func (a *Agent) sendHeartbeat() error {
func (a *Agent) gatherNodeStatus() NodeStatus { func (a *Agent) gatherNodeStatus() NodeStatus {
// For now, just provide basic information // For now, just provide basic information
// In future phases, this will include actual resource usage, workload status, etc. // In future phases, this will include actual resource usage, workload status, etc.
// Get basic system info for initial capacity reporting // Get basic system info for initial capacity reporting
var m runtime.MemStats var m runtime.MemStats
runtime.ReadMemStats(&m) runtime.ReadMemStats(&m)
// Convert to human-readable format (very simplified for now) // Convert to human-readable format (very simplified for now)
cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU() * 1000) cpuCapacity := fmt.Sprintf("%dm", runtime.NumCPU()*1000)
memCapacity := fmt.Sprintf("%dMi", m.Sys / (1024 * 1024)) memCapacity := fmt.Sprintf("%dMi", m.Sys/(1024*1024))
// For allocatable, we'll just use 90% of capacity for this phase // For allocatable, we'll just use 90% of capacity for this phase
cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU() * 900) cpuAllocatable := fmt.Sprintf("%dm", runtime.NumCPU()*900)
memAllocatable := fmt.Sprintf("%dMi", (m.Sys / (1024 * 1024)) * 9 / 10) memAllocatable := fmt.Sprintf("%dMi", (m.Sys/(1024*1024))*9/10)
return NodeStatus{ return NodeStatus{
NodeName: a.NodeName, NodeName: a.NodeName,
NodeUID: a.NodeUID, NodeUID: a.NodeUID,

View File

@ -13,6 +13,8 @@ import (
"testing" "testing"
"time" "time"
"crypto/x509/pkix"
"git.dws.rip/dubey/kat/internal/pki" "git.dws.rip/dubey/kat/internal/pki"
) )
@ -98,7 +100,7 @@ func TestAgentHeartbeat(t *testing.T) {
t.Fatalf("Failed to read CA certificate: %v", err) t.Fatalf("Failed to read CA certificate: %v", err)
} }
server.TLS.ClientCAs.AppendCertsFromPEM(caCertData) server.TLS.ClientCAs.AppendCertsFromPEM(caCertData)
// Set the server certificate to use the test node name as CN // Set the server certificate to use the test node name as CN
// to match what our test agent will expect // to match what our test agent will expect
server.TLS.Certificates = []tls.Certificate{ server.TLS.Certificates = []tls.Certificate{

View File

@ -2,14 +2,12 @@ package api
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"testing" "testing"
"time" "time"
"git.dws.rip/dubey/kat/internal/store"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock" "github.com/stretchr/testify/mock"
) )