From 5160a872901abb8a9e0947ae39ddd606ceb1595c Mon Sep 17 00:00:00 2001 From: Tanishq Dubey Date: Wed, 22 Oct 2025 16:37:20 -0400 Subject: [PATCH] Initial Commit --- .gitignore | 2 + CLAUDE.md | 79 ++++++++ frontend/Dockerfile | 19 ++ frontend/README.md | 0 frontend/main.py | 394 ++++++++++++++++++++++++++++++++++++++++ frontend/pyproject.toml | 9 + manifest.yaml | 126 +++++++++++++ reporter/Dockerfile | 17 ++ reporter/README.md | 0 reporter/main.py | 72 ++++++++ reporter/pyproject.toml | 9 + 11 files changed, 727 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 frontend/Dockerfile create mode 100644 frontend/README.md create mode 100644 frontend/main.py create mode 100644 frontend/pyproject.toml create mode 100644 manifest.yaml create mode 100644 reporter/Dockerfile create mode 100644 reporter/README.md create mode 100644 reporter/main.py create mode 100644 reporter/pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15e1884 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.woff +*.woff2 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..3addea7 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,79 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a Kubernetes-based NTP (Network Time Protocol) monitoring system that consists of two main components: +- **Reporter**: A DaemonSet that runs on each node to collect NTP metrics via chronyc +- **Frontend**: A web interface that aggregates data from all reporters and displays it in a terminal-style UI + +## Architecture + +The system follows a two-tier architecture: + +1. **Reporter Service** (`reporter/`) + - Python Flask application running on port 9898 + - Deployed as a DaemonSet to collect metrics from every Kubernetes node + - Uses `chronyc` command to query NTP status from the local chrony daemon + - Exposes `/fragment.json` endpoint with node-specific NTP data + +2. **Frontend Service** (`frontend/`) + - Python Flask application running on port 8080 + - Single replica deployment that aggregates data from all reporter pods + - Renders a terminal-style UI showing NTP status across the entire cluster + - Provides both HTML interface and JSON API endpoints + +## Development Commands + +### Building and Deploying +```bash +# Build reporter image +docker build -t git.dws.rip/dws/ntp/reporter:v8 reporter/ + +# Build frontend image +docker build -t git.dws.rip/dws/ntp/frontend:v11 frontend/ + +# Deploy to Kubernetes +kubectl apply -f manifest.yaml +``` + +### Local Development +```bash +# Run reporter locally (requires chrony to be installed) +cd reporter && python main.py + +# Run frontend locally +cd frontend && python main.py +``` + +### Testing +```bash +# Test reporter endpoint +curl http://localhost:9898/fragment.json + +# Test frontend aggregation +curl http://localhost:8080/api/fragments +``` + +## Key Configuration + +- **Reporter Service**: Uses host network (hostNetwork: true) to access node's chrony daemon +- **Environment Variables**: + - `REPORTER_SERVICE`: Kubernetes service name for discovering reporter pods + - `NODE_ID`, `PUBLIC_IP`, `BIND_IP`: Node-specific values from Kubernetes downward API +- **Service Discovery**: Frontend uses DNS resolution to find all reporter pod IPs + +## Data Flow + +1. Each reporter pod runs `chronyc tracking` and `chronyc sources` commands +2. Reporter parses CSV output and exposes via `/fragment.json` +3. Frontend resolves all reporter IPs via Kubernetes DNS +4. Frontend aggregates data and renders ASCII tables in terminal UI +5. Real-time clock sync via JavaScript using server time API + +## Dependencies + +- **Reporter**: Flask, chrony (system package) +- **Frontend**: Flask, requests, texttable +- Both use Python 3.10 slim base images \ No newline at end of file diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..fdf56d4 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,19 @@ +# Use a slim Python base +FROM python:3.10-slim + +# Install Python dependencies +RUN pip install Flask requests tabulate texttable + +WORKDIR /app/static +RUN mkdir fonts +COPY fonts/* ./fonts/ + + +# Copy the app +WORKDIR /app +RUN mkdir fonts +COPY main.py . + +# Expose the port and run the app +EXPOSE 8080 +CMD ["python", "main.py"] diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000..e69de29 diff --git a/frontend/main.py b/frontend/main.py new file mode 100644 index 0000000..de0c800 --- /dev/null +++ b/frontend/main.py @@ -0,0 +1,394 @@ +import os +import requests +import subprocess +import json +from flask import Flask, render_template_string, jsonify +from socket import gethostbyname_ex +from datetime import datetime +from texttable import Texttable # For ASCII tables +import time # For timestamp conversion + +app = Flask(__name__) + +# Config +REPORTER_SERVICE = os.environ.get("REPORTER_SERVICE", "ntp-reporter-svc.default.svc.cluster.local") + +# Tracking table config +TRACKING_METRICS_ORDER = [ + "Reference ID", "Ref Source IP", "Stratum", "Ref time (UTC)", "System time", + "Last offset", "RMS offset", "Frequency", "Residual freq", "Skew", + "Root delay", "Root dispersion", "Update interval", "Leap status" +] +# Define max widths for tracking table columns (approximate) +TRACKING_COL_WIDTHS = [18] + [24] * 3 # Metric Name + 3 Nodes + +# Sources table config +SOURCES_COLUMNS_ORDER = [ + "DWS PEER", "ModeState", "Name/IP address", "Stratum", "Poll", "Reach", + "LastRx", "Last sample", "Std Dev" +] +# Define widths for sources table columns +SOURCES_COL_WIDTHS = [24, 10, 32, 10, 7, 7, 8, 15, 10] + +# +# HTML Template - Radically simplified for TUI output +# +HTML_TEMPLATE = """ + + + + DWS LLC NTP STATUS + + + + + +
+$> ./dws_ntp_report
+**INFO**: COLLECTING DWS NTP POOL INFORMATION
+CURRENT TIME
+=================================
+TIME: --:--:--
+DATE: ----------
+STATUS: Syncing...
+CLOCK OFFSET: ---
+
+
+**INFO**: DETAILED METRICS:
+**INFO**: COLLECTING TRACKING STATUS METRICS:
+
+
+TRACKING STATUS
+{{ tracking_table_ascii }}
+
+**INFO**: COLLECTING UPSTREAM SOURCES METRICS:
+
+UPSTREAM SOURCES
+{{ sources_table_ascii }}
+
+**INFO**: REPORT COMPLETE
+
+**INFO**: DEVELOPER INFO
+
+USE DWS AS YOUR NTP POOL BY SETTING time.dws.rip AS YOUR NTP SOURCE
+
+
+**INFO**: DWS LLC // "IT'S YOUR INTERNET, TAKE IT BACK" // https://dws.rip
+**INFO**: DWS LLC // UNITED STATES OF AMERICA // 2025
+**INFO**: DWS NTP REPORT COMPLETE {{ gen_time_utc }}
+
+ + + + +""" + +def get_reporter_ips(service_name): + try: _, _, ips = gethostbyname_ex(service_name); return ips + except Exception as e: print(f"Error resolving service IPs: {e}"); return [] + +# --- NEW: Helper to convert Ref time (UTC) --- +def format_ref_time(timestamp_str): + try: + # Input is like "1761142077.558355643" (Unix timestamp with fractions) + ts = float(timestamp_str) + dt = datetime.utcfromtimestamp(ts) + # Format like: Wed Oct 22 14:08:24 2025 (UTC) + return dt.strftime('%a %b %d %H:%M:%S %Y') + " (UTC)" + except: + return timestamp_str # Return original if conversion fails + +# --- NEW: Helper to format floats nicely --- +def format_float(value_str, precision=3): + try: + f_val = float(value_str) + return f"{f_val:.{precision}f}" + except: + return value_str # Return original if not a float + +@app.route('/api/time') +def get_server_time(): + return jsonify({"time_utc": datetime.utcnow().isoformat() + "Z"}) + +# --- NEW: Endpoint to just return the raw fragments --- +# The JavaScript will use this to get the latest offset data +@app.route('/api/fragments') +def get_fragments_json(): + fragments = [] + ips = get_reporter_ips(REPORTER_SERVICE) + for ip in ips: + try: + res = requests.get(f"http://{ip}:9898/fragment.json", timeout=1) # Shorter timeout + if res.status_code == 200: fragments.append(res.json()) + except: pass # Ignore errors fetching fragments for this endpoint + fragments.sort(key=lambda x: x.get("node_id", "z")) + return jsonify(fragments) + + +def format_value(value, max_len=25): + """Truncates long values for table display.""" + if value is None: return "N/A" + s_val = str(value) + if len(s_val) > max_len: + return s_val[:max_len-3] + "..." + return s_val + +@app.route('/') +def homepage(): + fragments = [] + error_msg = "No errors." + meta_offset_ms = "N/A" + meta_leap_status = "Unknown" + ips = get_reporter_ips(REPORTER_SERVICE) + if not ips: error_msg = f"Could not resolve IPs for service '{REPORTER_SERVICE}'." + + # 1. Fetch fragments + for ip in ips: + try: + res = requests.get(f"http://{ip}:9898/fragment.json", timeout=2) + if res.status_code == 200: fragments.append(res.json()) + else: print(f"Failed fetch from {ip}: Status {res.status_code}") + except Exception as e: print(f"Failed connect to {ip}: {e}"); error_msg = str(e) + + fragments.sort(key=lambda x: x.get("node_id", "z")) + nodes_list = [f.get("node_id", "unknown") for f in fragments] + + # 2. Generate ASCII Tracking Table + track_table = Texttable(max_width=0) + track_table.set_deco(Texttable.BORDER | Texttable.HEADER | Texttable.VLINES) + track_table.set_chars(['─', '│', '┬', '═', '─', '┼', '│', '┌', '┐', '└', '┘']) + track_table.set_cols_width(TRACKING_COL_WIDTHS[:len(nodes_list)+1]) # Dynamic width based on nodes found + track_table.header(["Metric"] + nodes_list) + track_table.set_cols_align(["l"] + ["r"] * len(nodes_list)) + + for metric in TRACKING_METRICS_ORDER: + row = [metric] + for node_id in nodes_list: + node_data = next((f for f in fragments if f.get("node_id") == node_id), None) + value = "N/A" + if node_data and isinstance(node_data.get("tracking"), dict): + raw_value = node_data["tracking"].get(metric, "N/A") + # Format specific fields for better readability + if metric == "Ref time (UTC)": value = format_ref_time(raw_value) + elif metric in ["System time", "Last offset", "RMS offset", "Residual freq", "Skew", "Root delay", "Root dispersion"]: value = format_float(raw_value, 6) # Higher precision + elif metric == "Frequency": value = format_float(raw_value, 3) + elif metric == "Update interval": value = format_float(raw_value, 1) + else: value = format_value(raw_value) # Use generic formatter + row.append(value) + track_table.add_row(row) + tracking_table_ascii = track_table.draw() + + total_offset_seconds = 0.0 + valid_offset_count = 0 + leap_statuses = set() + # 3. Generate Meta Description Summary (using the first node's data) + if fragments: + for frag in fragments: + tracking = frag.get("tracking", {}) + if isinstance(tracking, dict) and "Error" not in tracking: + # Collect Leap Status + leap = tracking.get("Leap status") + if leap: + leap_statuses.add(leap) + + # Collect Offset + offset_str = tracking.get("Last offset", 0.1) + try: + offset_seconds = float(offset_str) + total_offset_seconds += offset_seconds + valid_offset_count += 1 + except (TypeError, ValueError): + pass # Ignore if offset is missing or not a number + + if valid_offset_count > 0: + avg_offset_seconds = total_offset_seconds / valid_offset_count + meta_offset_ms = f"~{(avg_offset_seconds * 1000):.1f}ms" # Use ~ for average + + if len(leap_statuses) == 1: + meta_leap_status = leap_statuses.pop() + elif len(leap_statuses) > 1: + meta_leap_status = "Mixed" + # else remains "Unknown" if no valid status found + + # 3. Generate ASCII Sources Table (Rotated/Concatenated) + source_table = Texttable(max_width=0) + source_table.set_deco(Texttable.BORDER | Texttable.HEADER | Texttable.VLINES) + source_table.set_chars(['─', '│', '┬', '═', '─', '┼', '│', '┌', '┐', '└', '┘']) + source_table.set_cols_width(SOURCES_COL_WIDTHS) + source_table.header(SOURCES_COLUMNS_ORDER) + source_table.set_cols_align(["l"] * 3 + ["r"] * 6) # Left align text, right align numbers + + if not fragments: + sources_table_ascii = "ERROR: Could not fetch data from any reporter pods." + else: + for f in fragments: + node_id = f.get("node_id", "unknown") + sources = f.get("sources", []) + if not sources: + source_table.add_row([node_id, "N/A", "No sources reported", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"]) + else: + for source in sources: + row = [ + format_value(node_id, SOURCES_COL_WIDTHS[0]-2), + f"{source.get('Mode', '?')}{source.get('State', '?')}", + format_value(source.get("Name/IP address", "N/A"), SOURCES_COL_WIDTHS[2]-2), + format_value(source.get("Stratum", "N/A")), + format_value(source.get("Poll", "N/A")), + format_value(source.get("Reach", "N/A")), + format_value(source.get("LastRx", "N/A")), + format_float(source.get("Last sample", "N/A"), 6), # Format sample offset + format_float(source.get("Std Dev", "N/A"), 3) # Format Std Dev/Jitter + ] + source_table.add_row(row) + sources_table_ascii = source_table.draw() + + gen_time = subprocess.run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"], capture_output=True, text=True).stdout.strip() + + return render_template_string( + HTML_TEMPLATE, + gen_time_utc=gen_time, + tracking_table_ascii=tracking_table_ascii, + sources_table_ascii=sources_table_ascii, + meta_description=f"DWS NTP Pool: {meta_leap_status}. Avg Offset: {meta_offset_ms}.", + error=error_msg + ) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) diff --git a/frontend/pyproject.toml b/frontend/pyproject.toml new file mode 100644 index 0000000..fecc023 --- /dev/null +++ b/frontend/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "frontend" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "flask>=3.1.2", +] diff --git a/manifest.yaml b/manifest.yaml new file mode 100644 index 0000000..5d04257 --- /dev/null +++ b/manifest.yaml @@ -0,0 +1,126 @@ +--- +# Create a namespace for this +apiVersion: v1 +kind: Namespace +metadata: + name: ntp-reporting +--- +# The headless service for the reporters +apiVersion: v1 +kind: Service +metadata: + name: ntp-reporter-svc + namespace: ntp-reporting +spec: + clusterIP: None # This makes it a headless service + selector: + app: ntp-reporter +--- +# The DaemonSet to run one reporter pod on each node +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: ntp-reporter + namespace: ntp-reporting + labels: + app: ntp-reporter +spec: + selector: + matchLabels: + app: ntp-reporter + template: + metadata: + labels: + app: ntp-reporter + spec: + hostNetwork: true + containers: + - name: reporter + image: git.dws.rip/dws/ntp/reporter:v8 + ports: + - containerPort: 9898 + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_ID + valueFrom: + fieldRef: + fieldPath: spec.nodeName # e.g. "us-server" + - name: PUBLIC_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP # This is the host's *internal* IP + - name: BIND_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP +--- +# The frontend deployment (just one replica) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ntp-frontend + namespace: ntp-reporting + labels: + app: ntp-frontend +spec: + replicas: 1 + selector: + matchLabels: + app: ntp-frontend + template: + metadata: + labels: + app: ntp-frontend + spec: + containers: + - name: frontend + image: git.dws.rip/dws/ntp/frontend:v11 + ports: + - containerPort: 8080 + env: + - name: REPORTER_SERVICE + # This is the K8s service name: . + value: "ntp-reporter-svc.ntp-reporting" +--- +# The service to expose the frontend internally +apiVersion: v1 +kind: Service +metadata: + name: ntp-frontend-svc + namespace: ntp-reporting +spec: + selector: + app: ntp-frontend + ports: + - protocol: TCP + port: 80 + targetPort: 8080 +--- +# The Traefik Ingress to expose the frontend to the world +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ntp-report-ingress + namespace: ntp-reporting + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-production" +spec: + rules: + - host: "time.dws.rip" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: ntp-frontend-svc + port: + number: 80 + # Enable this block for automatic HTTPS with Let's Encrypt + tls: + - hosts: + - "time.dws.rip" + secretName: time-dws-rip-tls # Traefik/Cert-Manager will create this diff --git a/reporter/Dockerfile b/reporter/Dockerfile new file mode 100644 index 0000000..db96a57 --- /dev/null +++ b/reporter/Dockerfile @@ -0,0 +1,17 @@ +# Use a slim Python base +FROM python:3.10-slim + +# Install chrony (we only need the 'chronyc' client) +RUN apt-get update && apt-get install -y chrony && \ + rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +RUN pip install Flask + +# Copy the app +WORKDIR /app +COPY main.py . + +# Expose the port and run the app +EXPOSE 8080 +CMD ["python", "main.py"] diff --git a/reporter/README.md b/reporter/README.md new file mode 100644 index 0000000..e69de29 diff --git a/reporter/main.py b/reporter/main.py new file mode 100644 index 0000000..518cdb1 --- /dev/null +++ b/reporter/main.py @@ -0,0 +1,72 @@ +import os +import subprocess +import csv +import io +from flask import Flask, jsonify + +app = Flask(__name__) + +# Config +NODE_ID = os.environ.get("NODE_ID", "unknown-node") +PUBLIC_IP = os.environ.get("PUBLIC_IP", "unknown-ip") +BIND_IP = os.environ.get("BIND_IP", "127.0.0.1") +CHRONY_HOST = "127.0.0.1" + +def run_chronyc(command): + try: + result = subprocess.run( + ["chronyc", "-h", CHRONY_HOST, "-c"] + command, + capture_output=True, text=True, timeout=2, check=True + ) + return result.stdout.strip() + except Exception as e: + print(f"Error running 'chronyc {command}': {e}") + return None + +def parse_tracking(): + raw_csv = run_chronyc(["tracking"]) + if not raw_csv: return {"Error": "Could not run tracking command"} + try: + reader = csv.reader(io.StringIO(raw_csv)) + headers = [ + "Reference ID", "Ref Source IP", "Stratum", "Ref time (UTC)", "System time", + "Last offset", "RMS offset", "Frequency", "Residual freq", + "Skew", "Root delay", "Root dispersion", "Update interval", "Leap status" + ] + values = next(reader) + return dict(zip(headers, values)) + except Exception as e: + return {"Error": f"Failed to parse tracking CSV: {e}", "RawData": raw_csv} + +def parse_sources(): + raw_csv = run_chronyc(["sources"]) + if not raw_csv: return [] + try: + reader = csv.reader(io.StringIO(raw_csv)) + # --- FIX: Add 10th header 'Std Dev' --- + headers = [ + "Mode", "State", "Name/IP address", "Stratum", "Poll", "Reach", + "LastRx", "Last sample", "Last sample original", "Std Dev" # Was: Last sample error + ] + sources_list = [] + for row in reader: + if row: sources_list.append(dict(zip(headers, row))) + return sources_list + except Exception as e: + print(f"Error parsing sources: {e}") + return [] + +@app.route('/fragment.json') +def get_fragment(): + tracking_data = parse_tracking() + sources_data = parse_sources() + return jsonify({ + "node_id": NODE_ID, + "public_ip": PUBLIC_IP, + "report_generated_time": subprocess.run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"], capture_output=True, text=True).stdout.strip(), + "tracking": tracking_data, + "sources": sources_data + }) + +if __name__ == '__main__': + app.run(host=BIND_IP, port=9898) diff --git a/reporter/pyproject.toml b/reporter/pyproject.toml new file mode 100644 index 0000000..14e02c1 --- /dev/null +++ b/reporter/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "reporter" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "flask>=3.1.2", +]