From 9b7abec506102e9dc2ea21625b63fb1b8a9ad743 Mon Sep 17 00:00:00 2001 From: Claude BM Date: Tue, 21 Apr 2026 03:25:31 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20TSHARPS-CI=20external=20runner=20?= =?UTF-8?q?=E2=80=94=20branch-independent=20CI=20pipeline?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete CI system that lives outside TSHARPS branches: - ci-webhook.py: HTTP server on port 9500, receives Gitea push webhooks - ci-runner.sh: runs feature manifests, pytest, package checks (read-only) - ci-notify.sh: sends results to Telegram CICD Pipeline topic (4706) - ci-config.json: branch→worktree mapping, tokens, timeouts - README.md: branch model, promotion workflow, switch-back plan Same tests for ALL branches. No drift. Runner self-monitors for crashes. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 67 +++++++++++++++++++++ ci-config.json | 18 ++++++ ci-notify.sh | 57 ++++++++++++++++++ ci-runner.sh | 135 ++++++++++++++++++++++++++++++++++++++++++ ci-webhook.py | 158 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 435 insertions(+) create mode 100644 README.md create mode 100644 ci-config.json create mode 100755 ci-notify.sh create mode 100755 ci-runner.sh create mode 100755 ci-webhook.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..356ced5 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +# TSHARPS-CI — External CI/CD Pipeline + +Branch-independent CI runner for the TSHARPS flight training scheduler. + +## Branch Model + +``` +TestMain ──► TestStaging ──► TestProduction + (dev) (validate) (live CI) +``` + +- **TestMain**: Develop CI changes here +- **TestStaging**: Validate before going live +- **TestProduction**: Runs against ALL TSHARPS branches on every push + +## How It Works + +1. Developer pushes to any TSHARPS branch (mfg, rogue, spif, staging, main) +2. Gitea webhook fires POST to `http://127.0.0.1:9500/ci` +3. `ci-webhook.py` receives the event and spawns `ci-runner.sh` +4. `ci-runner.sh` runs against the pushed branch's worktree: + - Feature manifest verification + - Test suite (with quarantine for known failures) + - Package import check +5. `ci-notify.sh` sends results to Telegram CICD Pipeline topic + +## Files + +| File | Purpose | +|------|---------| +| ci-webhook.py | HTTP server listening for Gitea push webhooks | +| ci-runner.sh | Main CI script — tests, features, packages | +| ci-notify.sh | Telegram notification sender | +| ci-config.json | Configuration (tokens, paths, timeouts) | + +## Key Rules + +- **Read-only**: CI runner never writes to TSHARPS repos +- **Same tests for all branches**: No branch-specific CI logic +- **No results stored here**: Test results handled separately +- **Runner self-monitoring**: Crashes send distinct alerts + +## Promoting CI Changes + +```bash +# 1. Develop on TestMain +git checkout TestMain +# make changes +git commit -am "add new check" +git push origin TestMain + +# 2. Promote to TestStaging +git checkout TestStaging && git merge TestMain && git push + +# 3. Promote to live +git checkout TestProduction && git merge TestStaging && git push + +# 4. Update server +ssh server "cd /srv/tsharps-ci && git pull origin TestProduction" +``` + +## Switch-Back to Gitea Actions + +If this doesn't work out: +1. Re-enable Gitea Actions: TSHARPS repo → Settings → Actions → check "Active" +2. Stop runner: `sudo systemctl stop tsharps-ci` +3. Remove webhook: TSHARPS repo → Settings → Webhooks → Delete diff --git a/ci-config.json b/ci-config.json new file mode 100644 index 0000000..0f6c502 --- /dev/null +++ b/ci-config.json @@ -0,0 +1,18 @@ +{ + "webhook_port": 9500, + "webhook_secret": "tsharps-ci-2026", + "telegram_bot_token": "8740468747:AAELdmdufDFPbzkVrvMm8VfbC74NFbkHLRk", + "telegram_chat_id": "-1003889780301", + "telegram_topic_id": 4706, + "test_timeout": 120, + "branches": { + "mfg": {"worktree": "/srv/tsharps-dev/mfg"}, + "rogue": {"worktree": "/srv/tsharps-dev/rogue"}, + "spif": {"worktree": "/srv/tsharps-dev/spif"}, + "staging": {"worktree": "/srv/tsharps-dev/staging"}, + "main": {"worktree": "/srv/tsharps"} + }, + "notify_on_pass": true, + "notify_on_fail": true, + "tag_users_on_fail": ["mostfunguy", "A31S15"] +} diff --git a/ci-notify.sh b/ci-notify.sh new file mode 100755 index 0000000..2f9f385 --- /dev/null +++ b/ci-notify.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# TSHARPS CI — Telegram Notification Sender +# Sends CI results to the CICD Pipeline Telegram topic. +# Called by ci-runner.sh with results. + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CONFIG="$SCRIPT_DIR/ci-config.json" + +BOT_TOKEN=$(python3 -c "import json; print(json.load(open('$CONFIG'))['telegram_bot_token'])") +CHAT_ID=$(python3 -c "import json; print(json.load(open('$CONFIG'))['telegram_chat_id'])") +TOPIC_ID=$(python3 -c "import json; print(json.load(open('$CONFIG'))['telegram_topic_id'])") +TAG_USERS=$(python3 -c "import json; users=json.load(open('$CONFIG')).get('tag_users_on_fail',[]); print(' '.join('@'+u for u in users))") + +BRANCH="$1" +COMMIT="$2" +STATUS="$3" # "pass" or "fail" or "error" +SUMMARY="$4" # e.g., "940 passed, 0 failed, 393 skipped" +FEATURES="$5" # e.g., "31/31 features verified" +DURATION="$6" # e.g., "12.3s" +ACTOR="$7" # who pushed + +if [ "$STATUS" = "pass" ]; then + ICON="✅" + MSG="${ICON} CI PASSED on ${BRANCH} (${COMMIT}) + +Tests: ${SUMMARY} +Features: ${FEATURES} +Duration: ${DURATION} +Pushed by: ${ACTOR}" + +elif [ "$STATUS" = "fail" ]; then + ICON="❌" + MSG="${ICON} CI FAILED on ${BRANCH} (${COMMIT}) + +Tests: ${SUMMARY} +Features: ${FEATURES} +Duration: ${DURATION} +Pushed by: ${ACTOR} + +${TAG_USERS} — this build needs attention." + +elif [ "$STATUS" = "error" ]; then + ICON="🚨" + MSG="${ICON} CI RUNNER ERROR on ${BRANCH} (${COMMIT}) + +The CI runner itself failed — not a test failure. +Check logs: journalctl -u tsharps-ci --since '5 minutes ago' + +${TAG_USERS} — runner needs attention." +fi + +curl -s -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \ + -d "chat_id=${CHAT_ID}" \ + -d "message_thread_id=${TOPIC_ID}" \ + -d "text=${MSG}" > /dev/null 2>&1 + +exit 0 diff --git a/ci-runner.sh b/ci-runner.sh new file mode 100755 index 0000000..932d10a --- /dev/null +++ b/ci-runner.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# TSHARPS CI — External Test Runner +# Runs tests, feature manifests, and package checks against TSHARPS worktrees. +# READ-ONLY — never writes, commits, or pushes to TSHARPS. +# Same checks for ALL branches — no branch-specific logic. + +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CONFIG="$SCRIPT_DIR/ci-config.json" +BRANCH="$1" +COMMIT="$2" +ACTOR="${3:-unknown}" +START_TIME=$(date +%s) + +# Load config +TIMEOUT=$(python3 -c "import json; print(json.load(open('$CONFIG')).get('test_timeout', 120))") +WORKTREE=$(python3 -c "import json; b=json.load(open('$CONFIG'))['branches']; print(b.get('$BRANCH',{}).get('worktree',''))") + +if [ -z "$WORKTREE" ] || [ ! -d "$WORKTREE" ]; then + echo "ERROR: Unknown branch '$BRANCH' or worktree not found" + bash "$SCRIPT_DIR/ci-notify.sh" "$BRANCH" "$COMMIT" "error" "Unknown branch or missing worktree" "" "0s" "$ACTOR" + exit 1 +fi + +# Prevent concurrent runs on the same branch +LOCKFILE="/tmp/tsharps-ci-${BRANCH}.lock" +exec 200>"$LOCKFILE" +if ! flock -n 200; then + echo "SKIP: CI already running for branch $BRANCH" + exit 0 +fi + +echo "=== TSHARPS CI Runner ===" +echo "Branch: $BRANCH" +echo "Commit: $COMMIT" +echo "Worktree: $WORKTREE" +echo "Timeout: ${TIMEOUT}s" +echo "=========================" + +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +ERROR_COUNT=0 +FEATURES_RESULT="" +OVERALL="pass" + +# ─── Step 1: Feature Manifest Check ─── +echo "" +echo "--- Feature Manifest Check ---" +if [ -f "$WORKTREE/ops/verify-features.py" ] && [ -d "$WORKTREE/.features" ]; then + FEATURE_OUTPUT=$(cd "$WORKTREE" && python3 ops/verify-features.py --verbose 2>&1) + FEATURE_EXIT=$? + FEATURES_RESULT=$(echo "$FEATURE_OUTPUT" | grep "RESULT:" | tail -1) + if [ -z "$FEATURES_RESULT" ]; then + FEATURES_RESULT=$(echo "$FEATURE_OUTPUT" | tail -1) + fi + echo "$FEATURE_OUTPUT" + if [ $FEATURE_EXIT -ne 0 ]; then + OVERALL="fail" + echo "FEATURE CHECK FAILED" + fi +else + FEATURES_RESULT="No manifests found" + echo "No feature manifests — skipping" +fi + +# ─── Step 2: Run Test Suite ─── +echo "" +echo "--- Test Suite ---" +PYTHON="$WORKTREE/.venv/bin/python3" +if [ ! -f "$PYTHON" ]; then + PYTHON="python3" +fi + +TEST_OUTPUT=$($PYTHON -m pytest "$WORKTREE/backend/tests/" --tb=line -q --timeout="$TIMEOUT" 2>&1) +TEST_EXIT=$? + +PASS_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ passed' | grep -oP '\d+' || echo 0) +FAIL_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ failed' | grep -oP '\d+' || echo 0) +SKIP_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ skipped' | grep -oP '\d+' || echo 0) +ERROR_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ error' | grep -oP '\d+' || echo 0) + +echo "Tests: $PASS_COUNT passed, $FAIL_COUNT failed, $SKIP_COUNT skipped, $ERROR_COUNT errors" + +if [ "$FAIL_COUNT" != "0" ] && [ "$FAIL_COUNT" != "" ]; then + OVERALL="fail" + echo "TESTS FAILED" +fi +if [ "$ERROR_COUNT" != "0" ] && [ "$ERROR_COUNT" != "" ]; then + OVERALL="fail" + echo "TEST COLLECTION ERRORS" +fi + +# ─── Step 3: Package Check ─── +echo "" +echo "--- Package Check ---" +PKG_OUTPUT=$($PYTHON -c " +import sys +required = ['fastapi','uvicorn','sqlalchemy','psycopg2','bcrypt','jwt','pandas','openpyxl','pydantic','ortools','astral','requests','dotenv','httpx'] +missing = [] +for mod in required: + try: __import__(mod) + except ImportError: missing.append(mod) +if missing: + print(f'FAIL: {len(missing)} missing: {missing}') + sys.exit(1) +print(f'OK: All {len(required)} packages verified') +" 2>&1) +PKG_EXIT=$? +echo "$PKG_OUTPUT" +if [ $PKG_EXIT -ne 0 ]; then + OVERALL="fail" +fi + +# ─── Results ─── +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) +SUMMARY="${PASS_COUNT} passed, ${FAIL_COUNT} failed, ${SKIP_COUNT} skipped" + +echo "" +echo "=========================" +echo "Result: $OVERALL" +echo "Summary: $SUMMARY" +echo "Features: $FEATURES_RESULT" +echo "Duration: ${DURATION}s" +echo "=========================" + +# ─── Send Notification ─── +bash "$SCRIPT_DIR/ci-notify.sh" \ + "$BRANCH" "$COMMIT" "$OVERALL" "$SUMMARY" "$FEATURES_RESULT" "${DURATION}s" "$ACTOR" + +# Release lock +flock -u 200 +exit 0 diff --git a/ci-webhook.py b/ci-webhook.py new file mode 100755 index 0000000..bb44a68 --- /dev/null +++ b/ci-webhook.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +TSHARPS CI — Webhook Receiver + +Lightweight HTTP server that listens for Gitea push webhooks and spawns +the CI runner as a background process. Returns 200 immediately. + +If the runner subprocess crashes, sends a distinct "CI RUNNER ERROR" alert. +""" +import hashlib +import hmac +import json +import os +import subprocess +import sys +from http.server import HTTPServer, BaseHTTPRequestHandler +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +CONFIG_PATH = SCRIPT_DIR / "ci-config.json" + +def load_config(): + with open(CONFIG_PATH) as f: + return json.load(f) + +CONFIG = load_config() +PORT = CONFIG.get("webhook_port", 9500) +SECRET = CONFIG.get("webhook_secret", "").encode() + + +def verify_signature(payload: bytes, signature: str) -> bool: + """Verify Gitea webhook HMAC signature.""" + if not SECRET: + return True # No secret configured — accept all + expected = hmac.new(SECRET, payload, hashlib.sha256).hexdigest() + return hmac.compare_digest(f"sha256={expected}", signature) + + +class WebhookHandler(BaseHTTPRequestHandler): + def do_POST(self): + if self.path != "/ci": + self.send_response(404) + self.end_headers() + return + + content_length = int(self.headers.get("Content-Length", 0)) + payload = self.rfile.read(content_length) + + # Verify signature if configured + signature = self.headers.get("X-Gitea-Signature", "") + if SECRET and not verify_signature(payload, signature): + self.send_response(403) + self.end_headers() + self.wfile.write(b"Invalid signature") + return + + # Parse push event + try: + data = json.loads(payload) + except json.JSONDecodeError: + self.send_response(400) + self.end_headers() + self.wfile.write(b"Invalid JSON") + return + + # Extract branch, commit, actor + ref = data.get("ref", "") + branch = ref.replace("refs/heads/", "") if ref.startswith("refs/heads/") else ref + commits = data.get("commits", []) + commit = commits[-1]["id"][:7] if commits else data.get("after", "")[:7] + actor = data.get("pusher", {}).get("login", "unknown") + + # Check if branch is in our config + if branch not in CONFIG.get("branches", {}): + self.send_response(200) + self.end_headers() + self.wfile.write(f"Branch '{branch}' not configured — skipping".encode()) + return + + print(f"[CI] Push to {branch} ({commit}) by {actor} — spawning runner") + + # Spawn ci-runner.sh as background process + runner_path = SCRIPT_DIR / "ci-runner.sh" + try: + proc = subprocess.Popen( + ["bash", str(runner_path), branch, commit, actor], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + cwd=str(SCRIPT_DIR), + ) + # Non-blocking — don't wait for completion + # But set up a thread to catch crashes + import threading + def _monitor(p, branch, commit, actor): + stdout, _ = p.communicate() + if p.returncode != 0: + print(f"[CI] RUNNER CRASHED for {branch} ({commit}): exit {p.returncode}") + # Send runner error notification + notify_path = SCRIPT_DIR / "ci-notify.sh" + subprocess.run( + ["bash", str(notify_path), branch, commit, "error", + f"Runner crashed with exit code {p.returncode}", "", "0s", actor], + cwd=str(SCRIPT_DIR), + ) + else: + print(f"[CI] Runner completed for {branch} ({commit})") + + t = threading.Thread(target=_monitor, args=(proc, branch, commit, actor), daemon=True) + t.start() + + except Exception as e: + print(f"[CI] Failed to spawn runner: {e}") + # Send error notification directly + notify_path = SCRIPT_DIR / "ci-notify.sh" + subprocess.run( + ["bash", str(notify_path), branch, commit, "error", + f"Failed to spawn runner: {e}", "", "0s", actor], + cwd=str(SCRIPT_DIR), + ) + + # Return 200 immediately + self.send_response(200) + self.end_headers() + self.wfile.write(f"CI triggered for {branch} ({commit})".encode()) + + def do_GET(self): + """Health check endpoint.""" + if self.path == "/health": + self.send_response(200) + self.end_headers() + self.wfile.write(json.dumps({ + "status": "healthy", + "branches": list(CONFIG.get("branches", {}).keys()), + "port": PORT, + }).encode()) + return + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + print(f"[CI-Webhook] {args[0]}") + + +def main(): + print(f"[CI] TSHARPS CI Webhook Receiver starting on port {PORT}") + print(f"[CI] Configured branches: {list(CONFIG.get('branches', {}).keys())}") + print(f"[CI] Signature validation: {'enabled' if SECRET else 'disabled'}") + + server = HTTPServer(("127.0.0.1", PORT), WebhookHandler) + try: + server.serve_forever() + except KeyboardInterrupt: + print("[CI] Shutting down") + server.shutdown() + + +if __name__ == "__main__": + main()