Add tag-targeted test runs to CI runner

ci-runner.sh now accepts optional 4th parameter for pytest markers:
  ci-runner.sh mfg abc123 claude genlab  → runs only genlab tests
  ci-runner.sh mfg abc123 claude heavy   → runs full heavy suite
  ci-runner.sh mfg abc123 claude         → runs full light suite (default)

Changes:
- TAG parameter parsed from $4
- pytest -m flag applied when tag provided
- RUN_MODE shown in Telegram notification (light/heavy/tag:name)
- Baseline check only enforced on full light suite runs
- Comments noting tags need periodic updates as features change

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude BM 2026-04-28 14:44:22 +00:00
parent 7605094efb
commit 76dfd3b3c0

View File

@ -3,6 +3,17 @@
# Runs tests, feature manifests, and package checks against TSHARPS worktrees. # Runs tests, feature manifests, and package checks against TSHARPS worktrees.
# READ-ONLY — never writes, commits, or pushes to TSHARPS. # READ-ONLY — never writes, commits, or pushes to TSHARPS.
# Same checks for ALL branches — no branch-specific logic. # Same checks for ALL branches — no branch-specific logic.
#
# Usage:
# ci-runner.sh <branch> <commit> <actor> [tag]
#
# Optional [tag] parameter runs only tests with that pytest marker:
# ci-runner.sh mfg abc123 claude genlab → runs only genlab-tagged tests
# ci-runner.sh mfg abc123 claude → runs full light suite (default)
# ci-runner.sh mfg abc123 claude heavy → runs full heavy suite
#
# NOTE: Feature tags (genlab, scheduling, auth, etc.) need updating from time
# to time as features change. See pytest.ini for the current list of markers.
set -o pipefail set -o pipefail
@ -11,6 +22,7 @@ CONFIG="$SCRIPT_DIR/ci-config.json"
BRANCH="$1" BRANCH="$1"
COMMIT="$2" COMMIT="$2"
ACTOR="${3:-unknown}" ACTOR="${3:-unknown}"
TAG="${4:-}"
START_TIME=$(date +%s) START_TIME=$(date +%s)
# Load config # Load config
@ -83,7 +95,21 @@ if $PYTHON -c "import pytest_timeout" 2>/dev/null; then
TIMEOUT_FLAG="--timeout=$TIMEOUT" TIMEOUT_FLAG="--timeout=$TIMEOUT"
fi fi
TEST_OUTPUT=$($PYTHON -m pytest "$WORKTREE/backend/tests/" --tb=line -q $TIMEOUT_FLAG 2>&1) # Build pytest command — use tag filter if provided
TAG_FLAG=""
RUN_MODE="light"
if [ -n "$TAG" ]; then
if [ "$TAG" = "heavy" ]; then
TAG_FLAG=""
RUN_MODE="heavy"
export RUN_HEAVY_TESTS=1
else
TAG_FLAG="-m $TAG"
RUN_MODE="tag:$TAG"
fi
fi
TEST_OUTPUT=$($PYTHON -m pytest "$WORKTREE/backend/tests/" --tb=line -q $TIMEOUT_FLAG $TAG_FLAG 2>&1)
TEST_EXIT=$? TEST_EXIT=$?
PASS_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ passed' | grep -oP '\d+' || echo 0) PASS_COUNT=$(echo "$TEST_OUTPUT" | grep -oP '\d+ passed' | grep -oP '\d+' || echo 0)
@ -115,8 +141,9 @@ if [ "$ERROR_COUNT" != "0" ] && [ "$ERROR_COUNT" != "" ]; then
fi fi
# ─── Test Count Baseline Check ─── # ─── Test Count Baseline Check ───
# Baseline check only applies to full suite runs (light), not tag-targeted runs
BASELINE=$(python3 -c "import json; print(json.load(open('$CONFIG')).get('test_count_baseline', 0))") BASELINE=$(python3 -c "import json; print(json.load(open('$CONFIG')).get('test_count_baseline', 0))")
if [ "$BASELINE" -gt 0 ] && [ "$PASS_COUNT" -lt "$BASELINE" ]; then if [ "$RUN_MODE" = "light" ] && [ "$BASELINE" -gt 0 ] && [ "$PASS_COUNT" -lt "$BASELINE" ]; then
OVERALL="fail" OVERALL="fail"
echo "TEST COUNT REGRESSION: expected >= $BASELINE passed, got $PASS_COUNT" echo "TEST COUNT REGRESSION: expected >= $BASELINE passed, got $PASS_COUNT"
fi fi
@ -151,7 +178,13 @@ SUMMARY="${PASS_COUNT} passed, ${FAIL_COUNT} failed, ${SKIP_COUNT} skipped"
TAGS_USED=$(grep -rhoP 'pytest\.mark\.\K\w+' "$WORKTREE/backend/tests/test_"*.py 2>/dev/null | \ TAGS_USED=$(grep -rhoP 'pytest\.mark\.\K\w+' "$WORKTREE/backend/tests/test_"*.py 2>/dev/null | \
sort -u | grep -vxE 'skipif|skip|parametrize|fixture|unit|integration|pipeline|e2e|slow|heavy' | \ sort -u | grep -vxE 'skipif|skip|parametrize|fixture|unit|integration|pipeline|e2e|slow|heavy' | \
tr '\n' ', ' | sed 's/,$//') tr '\n' ', ' | sed 's/,$//')
SUITE_INFO="Tags: ${TAGS_USED} | ${DESELECTED_COUNT} not in scope" if [ "$RUN_MODE" = "light" ]; then
SUITE_INFO="Mode: light (full) | Tags: ${TAGS_USED} | ${DESELECTED_COUNT} not in scope"
elif [ "$RUN_MODE" = "heavy" ]; then
SUITE_INFO="Mode: heavy (all tests) | Tags: ${TAGS_USED}"
else
SUITE_INFO="Mode: ${RUN_MODE} | ${DESELECTED_COUNT} not in scope"
fi
echo "" echo ""
echo "=========================" echo "========================="