init: common-skills v1

2026-03-26 21:00:51 +08:00
commit 264dacf157
16 changed files with 859 additions and 0 deletions
@@ -0,0 +1,25 @@
 #!/usr/bin/env bash
 # Pre-push hook: run regression check on changed skills only.
 # Install: cp .githooks/pre-push .git/hooks/pre-push && chmod +x .git/hooks/pre-push
 set -euo pipefail
 # Find skills changed in this push
 changed_skills=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^skills/' | cut -d/ -f2 | sort -u || true)
 if [[ -z "$changed_skills" ]]; then
  exit 0
 fi
 echo "🔍 Running regression check for changed skills: $changed_skills"
 for skill in $changed_skills; do
  if [[ ! -f "skills/$skill/evals/evals.json" ]]; then
    echo "❌ Missing evals/evals.json for skill: $skill"
    echo "   Every skill must have evals before it can be pushed."
    exit 1
  fi
  python scripts/run_evals.py "$skill" --check-regression
 done
 echo "✅ All checks passed."
@@ -0,0 +1,10 @@
 {
  "name": "main",
  "description": "Eval agent for common-skills. Loads all skills for evaluation.",
  "prompt": "You are an evaluation assistant. Load the relevant skill when the user's request matches its domain, then answer based on the skill's guidance.",
  "tools": ["fs_read", "execute_bash", "grep", "glob"],
  "allowedTools": ["fs_read", "execute_bash", "grep", "glob"],
  "resources": [
    "skill://skills/**/SKILL.md"
  ]
 }
@@ -0,0 +1,67 @@
 # common-skills
 Shared Kiro agent skills for the team. All skills are evaluated before merge.
 ## Structure
 ```
 skills/
 ├── codereview/
 │   ├── SKILL.md
 │   └── evals/evals.json
 ├── docs-rag/
 │   ├── SKILL.md
 │   ├── data/index.json
 │   └── evals/evals.json
 ├── python/
 ├── testing/
 └── typescript/
 scripts/
 ├── run_evals.py   ← eval runner with regression protection
 └── sync.sh        ← sync skills into a project
 .githooks/
 └── pre-push       ← blocks push if changed skills regress
 baselines.json     ← recorded pass rates (committed to repo)
 ```
 ## Using Skills in Your Project
 ```bash
 # Sync all skills
 COMMON_SKILLS_DIR=~/common-skills bash scripts/sync.sh
 # Sync specific skills only
 COMMON_SKILLS_DIR=~/common-skills bash scripts/sync.sh codereview python
 ```
 ## Contributing a New Skill
 1. Create `skills/<name>/SKILL.md` with YAML frontmatter (`name`, `description`)
 2. Add `skills/<name>/evals/evals.json` with at least 3 eval cases
 3. Run evals locally and update baseline:
   ```bash
   python scripts/run_evals.py <name> --update-baseline
   ```
 4. Push — the pre-push hook will verify no regressions on changed skills
 ## Running Evals
 ```bash
 # Run all skills
 python scripts/run_evals.py
 # Run single skill
 python scripts/run_evals.py codereview
 # Check for regressions against baselines.json
 python scripts/run_evals.py --check-regression
 # After improving a skill, record new baseline
 python scripts/run_evals.py codereview --update-baseline
 ```
 ## Install pre-push Hook
 ```bash
 cp .githooks/pre-push .git/hooks/pre-push && chmod +x .git/hooks/pre-push
 ```
@@ -0,0 +1,142 @@
 #!/usr/bin/env python3
 """
 Skill eval runner with regression protection.
 Usage:
  python scripts/run_evals.py                     # run all skills
  python scripts/run_evals.py codereview          # run single skill
  python scripts/run_evals.py codereview --iter 2 # specify iteration
  python scripts/run_evals.py --check-regression  # fail if any skill regressed
 """
 import argparse
 import json
 import re
 import subprocess
 import time
 from pathlib import Path
 SKILLS_DIR = Path("skills")
 WORKSPACE_ROOT = Path("evals-workspace")
 BASELINE_FILE = Path("baselines.json")
 def run_prompt(prompt: str, with_skill: bool) -> tuple[str, float]:
    agent = "main" if with_skill else "default"
    start = time.time()
    result = subprocess.run(
        ["kiro-cli", "chat", "--agent", agent, "--no-interactive", "--message", prompt],
        capture_output=True, text=True, timeout=90,
    )
    elapsed = round(time.time() - start, 2)
    response = re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', result.stdout).strip()
    return response, elapsed
 def grade(response: str, expected_output: str) -> dict:
    words = re.findall(r'[a-zA-Z]{5,}', expected_output)
    keywords = list({w.lower() for w in words})
    matched = [kw for kw in keywords if kw in response.lower()]
    score = round(len(matched) / len(keywords), 2) if keywords else 0.0
    return {"score": score, "passed": score >= 0.4, "matched_keywords": matched[:10]}
 def load_baselines() -> dict:
    return json.loads(BASELINE_FILE.read_text()) if BASELINE_FILE.exists() else {}
 def save_baselines(baselines: dict) -> None:
    BASELINE_FILE.write_text(json.dumps(baselines, indent=2))
 def run_skill_evals(skill_name: str, iteration: int) -> dict:
    evals_file = SKILLS_DIR / skill_name / "evals" / "evals.json"
    if not evals_file.exists():
        print(f"  ⚠️  No evals.json: {evals_file}")
        return {}
    evals = json.loads(evals_file.read_text()).get("evals", [])
    iter_dir = WORKSPACE_ROOT / skill_name / f"iteration-{iteration}"
    results = []
    print(f"\n[Skill: {skill_name}] iteration-{iteration}")
    for case in evals:
        case_id, prompt, expected = case["id"], case["prompt"], case["expected_output"]
        case_dir = iter_dir / f"eval-{skill_name}-{case_id}"
        for mode in ("with_skill", "without_skill"):
            out_dir = case_dir / mode
            out_dir.mkdir(parents=True, exist_ok=True)
            response, elapsed = run_prompt(prompt, mode == "with_skill")
            grading = grade(response, expected)
            (out_dir / "response.txt").write_text(response)
            (out_dir / "timing.json").write_text(json.dumps({"duration_seconds": elapsed, "with_skill": mode == "with_skill"}, indent=2))
            (out_dir / "grading.json").write_text(json.dumps(grading, indent=2))
        with_grade = json.loads((case_dir / "with_skill" / "grading.json").read_text())
        without_grade = json.loads((case_dir / "without_skill" / "grading.json").read_text())
        delta = round(with_grade["score"] - without_grade["score"], 2)
        status = "✅" if with_grade["passed"] else "❌"
        print(f"  Case {case_id}: {status} score={with_grade['score']} (Δ{delta:+.2f} vs baseline)")
        results.append({"id": case_id, "with_skill": with_grade, "without_skill": without_grade})
    passed = sum(1 for r in results if r["with_skill"]["passed"])
    pass_rate = round(passed / len(results), 2) if results else 0
    benchmark = {"skill": skill_name, "iteration": iteration, "total": len(results), "passed": passed, "pass_rate": pass_rate, "cases": results}
    (iter_dir / "benchmark.json").write_text(json.dumps(benchmark, indent=2))
    print(f"  Pass rate: {passed}/{len(results)}")
    return benchmark
 def check_regression(benchmark: dict, baselines: dict) -> list[str]:
    """Return list of regression messages, empty if no regression."""
    skill = benchmark.get("skill")
    if not skill or skill not in baselines:
        return []
    baseline_rate = baselines[skill]["pass_rate"]
    current_rate = benchmark.get("pass_rate", 0)
    if current_rate < baseline_rate:
        return [f"{skill}: pass_rate dropped {baseline_rate} → {current_rate}"]
    return []
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("skill", nargs="?", help="Skill name (default: all)")
    parser.add_argument("--iter", type=int, default=1)
    parser.add_argument("--update-baseline", action="store_true", help="Save current results as new baseline")
    parser.add_argument("--check-regression", action="store_true", help="Exit non-zero if any skill regressed")
    args = parser.parse_args()
    skills = [args.skill] if args.skill else [d.name for d in SKILLS_DIR.iterdir() if d.is_dir()]
    baselines = load_baselines()
    regressions = []
    all_results = []
    for skill in filter(None, skills):
        result = run_skill_evals(skill, args.iter)
        if result:
            all_results.append(result)
            regressions.extend(check_regression(result, baselines))
    total = sum(r.get("total", 0) for r in all_results)
    passed = sum(r.get("passed", 0) for r in all_results)
    print(f"\n{'='*40}")
    print(f"Overall: {passed}/{total} cases passed")
    if args.update_baseline:
        for r in all_results:
            baselines[r["skill"]] = {"pass_rate": r["pass_rate"], "iteration": r["iteration"]}
        save_baselines(baselines)
        print(f"Baselines updated → {BASELINE_FILE}")
    if args.check_regression and regressions:
        print("\n🚨 Regressions detected:")
        for msg in regressions:
            print(f"  ❌ {msg}")
        raise SystemExit(1)
    print(f"Results saved to {WORKSPACE_ROOT}/")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,45 @@
 #!/usr/bin/env bash
 # Sync skills from common-skills repo into the current project's .kiro/skills/
 #
 # Usage:
 #   bash scripts/sync.sh                        # sync all skills
 #   bash scripts/sync.sh codereview python      # sync specific skills
 #
 # Set COMMON_SKILLS_DIR to override the source path (default: ~/common-skills)
 set -euo pipefail
 COMMON_SKILLS_DIR="${COMMON_SKILLS_DIR:-$HOME/common-skills}"
 TARGET_DIR=".kiro/skills"
 SKILLS_SRC="$COMMON_SKILLS_DIR/skills"
 if [[ ! -d "$SKILLS_SRC" ]]; then
  echo "❌ common-skills not found at $COMMON_SKILLS_DIR"
  echo "   Clone it first: git clone <repo> ~/common-skills"
  exit 1
 fi
 # Pull latest
 git -C "$COMMON_SKILLS_DIR" pull --ff-only 2>/dev/null || true
 mkdir -p "$TARGET_DIR"
 if [[ $# -gt 0 ]]; then
  skills=("$@")
 else
  skills=($(ls "$SKILLS_SRC"))
 fi
 for skill in "${skills[@]}"; do
  src="$SKILLS_SRC/$skill"
  dst="$TARGET_DIR/$skill"
  if [[ ! -d "$src" ]]; then
    echo "⚠️  Skill not found: $skill"
    continue
  fi
  rm -rf "$dst"
  cp -r "$src" "$dst"
  echo "✅ Synced: $skill"
 done
 echo "Done. Skills available in $TARGET_DIR/"
@@ -0,0 +1,44 @@
 ---
 name: codereview-skill
 description: Code review best practices and checklist. Use when reviewing PRs, analyzing code quality, or checking for bugs and anti-patterns.
 ---
 # Code Review Skill
 ## Review Checklist
 When reviewing code, check the following:
 ### Correctness
 - Logic is correct and handles edge cases
 - No off-by-one errors in loops
 - Null/None checks where needed
 ### Readability
 - Variable and function names are descriptive
 - Functions do one thing (single responsibility)
 - No magic numbers — use named constants
 ### Security
 - No hardcoded secrets or credentials
 - User inputs are validated/sanitized
 - No SQL injection or command injection risks
 ## Example: Bad vs Good
 ```python
 # Bad
 def f(x):
    return x * 86400  # magic number
 # Good
 SECONDS_PER_DAY = 86400
 def to_seconds(days: int) -> int:
    return days * SECONDS_PER_DAY
 ```
 ## Common Anti-patterns to Flag
 - Functions longer than 40 lines → suggest splitting
 - Deeply nested conditionals (>3 levels) → suggest early return
 - Duplicate code blocks → suggest extracting to function
@@ -0,0 +1,20 @@
 {
  "skill_name": "codereview",
  "evals": [
    {
      "id": 1,
      "prompt": "Review this Python function for issues:\ndef calc(x): return x*86400",
      "expected_output": "Identifies the magic number 86400 and suggests extracting it as a named constant like SECONDS_PER_DAY."
    },
    {
      "id": 2,
      "prompt": "Is this code okay?\ndef get_user(db, id):\n    return db.execute('SELECT * FROM users WHERE id=' + id)",
      "expected_output": "Flags SQL injection vulnerability and recommends parameterized queries."
    },
    {
      "id": 3,
      "prompt": "Review this function:\ndef process(a,b,c,d,e,f,g): return a+b+c+d+e+f+g",
      "expected_output": "Flags too many parameters and suggests refactoring to use a data structure or fewer arguments."
    }
  ]
 }
@@ -0,0 +1,78 @@
 ---
 name: docs-rag
 description: >
  3GPP technical specification document retrieval and index management. Use when
  the user asks about 3GPP specs, Release 19 features, mission critical services,
  ambient IoT, ISAC, UAV/drone support, network sharing, SNPN interconnect,
  traffic steering/split, or any question answered by the local docs/ directory.
  Also use when the user wants to update, rebuild, or refresh the docs index,
  or when new documents have been added to the docs/ directory.
 ---
 # 3GPP Docs RAG Skill
 Lightweight retrieval-augmented generation over the local `docs/` directory of
 3GPP Release 19 specifications and study reports.
 ## Directory Layout
 ```
 .kiro/skills/docs-rag/
 ├── SKILL.md          ← this file
 ├── data/
 │   └── index.json    ← document index (spec, title, keywords, summary, file path)
 └── evals/
    └── evals.json    ← evaluation cases
 scripts/
 └── build_index.py    ← index builder / updater
 ```
 ## Maintaining the Index
 **Full rebuild** (after adding many new docs):
 ```bash
 python scripts/build_index.py
 ```
 **Incremental update** (add new docs or refresh changed ones):
 ```bash
 python scripts/build_index.py --update
 ```
 The script scans `docs/` for `.docx` / `.doc` files, extracts the scope section
 and header metadata, and writes `data/index.json`. Unchanged files (same mtime)
 are skipped in `--update` mode.
 ## How to Answer Queries
 1. Read `.kiro/skills/docs-rag/data/index.json`.
 2. Match the user's query against `keywords` and `summary` fields.
 3. If the summary is sufficient, answer directly from it.
 4. If deeper detail is needed, read the actual file at the `file` path.
 ## Document Index Summary
 | Spec | Title | Key Topics |
 |------|-------|------------|
 | TS 22.280 | Mission Critical Services Common Requirements | MCPTT, MCData, MCVideo, public safety |
 | TS 22.369 | Service Requirements for Ambient IoT | battery-less IoT, energy harvesting |
 | TR 22.837 | Integrated Sensing and Communication (ISAC) | NR sensing, V2X, UAV, smart city |
 | TR 22.840 | Study on Ambient Power-enabled IoT | ambient IoT, 5G service requirements |
 | TR 22.841 | Traffic Steer/Switch/Split over Dual 3GPP Access | multi-access, PLMN, satellite NR |
 | TR 22.843 | UAV Phase 3 | drone, flight management, UTM, QoS |
 | TR 22.848 | Interconnect of SNPN | private network, SNPN, PLMN interconnect |
 | TR 22.851 | Network Sharing Feasibility Study | NG-RAN sharing, mobility, charging |
 ## Retrieval Rules
 - **Keyword match**: scan `keywords` array for overlap with user query terms.
 - **Semantic match**: if no keyword hit, check `summary` for conceptual relevance.
 - **Multi-doc**: return all relevant docs when the query spans multiple topics.
 - **No match**: say so clearly rather than hallucinating.
 ## Answer Format
 Always cite the spec number and version:
 > According to **3GPP TR 22.843 V19.2.0**, the 5G system supports UAV flight path
 > recommendation and inflight monitoring based on network QoS information.
@@ -0,0 +1,171 @@
 {
  "description": "3GPP technical specifications and study reports index",
  "documents": [
    {
      "id": "22280-j50",
      "file": "docs/22280-j50/22280-j50.doc",
      "spec": "3GPP TS 22.280 V19.5.0",
      "type": "Technical Specification",
      "title": "3rd Generation Partnership Project; Technical Specification Group Services and System Aspects; Mission Critical Services Common Requirements (MCCoRe); Stage 1 (Release 19)",
      "release": "Release 19",
      "keywords": [
        "MCPTT",
        "MCData",
        "MCVideo",
        "mission critical",
        "public safety",
        "maritime"
      ],
      "summary": "The present document provides the service requirements that are common across two or more mission critical services, that is MCPTT, MCData and MCVideo. The mission critical services make use of capabilities included in Group Communications System Enablers and Proximity Services, with additional requirements specific to the MCPTT Service as specified in 3GPP TS 22.179 [1], MCVideo Service as specified in 3GPP TS 22.281 [2], and MCData Service as specified in 3GPP TS 22.282 [3]. The mission critical services can be used for public safety applications and maritime safety applications and also for",
      "_mtime": 1774526763.1365554
    },
    {
      "id": "22369-j20",
      "file": "docs/22369-j20/22369-j20.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "ambient IoT",
        "energy harvesting",
        "battery-less",
        "ambient power",
        "IoT"
      ],
      "summary": "The present document describes service and performance requirements for ambient power-enabled Internet of Things (i.e. Ambient IoT) . In the context of the present document, Ambient IoT device is an IoT device powered by energy harvesting, being either battery-less or with limited energy storage capability (e.g. using a capacitor) and the energy is provided through the harvesting of radio waves, light, motion, heat, or any other power source that could be seen suitable. An A mbient IoT device has low complexity, small size and lower capabilities and lower power consumption than previously defi",
      "_mtime": 1774526763.4285553
    },
    {
      "id": "22837-j40",
      "file": "docs/22837-j40/22837-j40.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "sensing",
        "V2X",
        "UAV",
        "smart city",
        "smart home",
        "healthcare",
        "maritime",
        "NR",
        "E-UTRA",
        "5G"
      ],
      "summary": "The present document describes use cases and potential requirements for enhancement of the 5G system to provide sensing services addressing different target verticals/applications, e.g. autonomous/assisted driving, V2X, UAVs, 3D map reconstruction, smart city, smart home, factories, healthcare, maritime sector. Use cases focus on NR-based sensing, while some use cases might make use of information already available in EPC and E-UTRA (e.g. cell/UE measurements, location updates). This study will not lead to impacts on EPC and E-UTRA. Some use cases could also include non-3GPP type sensors (e.g.",
      "_mtime": 1774526764.1485553
    },
    {
      "id": "22840-j00",
      "file": "docs/22840-j00/22840-j00.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "ambient IoT",
        "energy harvesting",
        "battery-less",
        "ambient power",
        "5G",
        "IoT"
      ],
      "summary": "The present document provides Stage 1 potential 5G service requirements for ambient power-enabled Internet of Things (i.e., Ambient IoT). In the context of the present document, an Ambient power-enabled IoT device is an IoT device powered by energy harvesting, being either battery-less or with limited energy storage capability (e.g., using a capacitor) and the energy is provided through the harvesting of radio waves, light, motion, heat, or any other suitable power source. An ambient IoT device is expected to have low er complexity, small er size and reduced capabilities and lower power consum",
      "_mtime": 1774526766.1005547
    },
    {
      "id": "22841-j00",
      "file": "docs/22841-j00/22841-j00.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "PLMN",
        "NPN",
        "satellite",
        "GEO",
        "MEO",
        "LEO",
        "NR",
        "E-UTRA",
        "5G",
        "5GS"
      ],
      "summary": "The scope of this TR is to document use cases, gap analysis and potential service requirements related to 5GS support of enhanced mechanisms for steering, split ting and switch ing of user data , pertaining to a UE data session, across two 3GPP networks. The following scenarios are covered, where only one single PLMN subscription is assumed : Single PLMN; PLMN and NPN ; two PLMNs. The two 3GPP networks may use same or different RAT, i.e. NR plus NR or E-UTRA, where NR RAT can be terrestrial or satellite NR access (including different staellite orbits, e.g., GEO/MEO/LEO). For the PLMN plus PLMN",
      "_mtime": 1774526766.9005547
    },
    {
      "id": "22843-j20",
      "file": "docs/22843-j20/22843-j20.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "UAV",
        "5G",
        "QoS",
        "security"
      ],
      "summary": "The present document provides additional use cases of UAV and identifies potential requirements to improve 5G system’s support of UAV applications, UAV operations and management, including: Provide additional information to the UAV operator/USS to execute pre-flight preparations and inflight operation (e.g, flight mission application, flight path recommendation, flight monitoring and control); Use 5G system to support enhancing the UAV flight/route management based on network capacity and QoS information along the planned route; Use 5G system to further enhance the safety and security of UAV o",
      "_mtime": 1774526761.9005556
    },
    {
      "id": "22848-j00",
      "file": "docs/22848-j00/22848-j00.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "PLMN",
        "NPN",
        "SNPN",
        "interconnect"
      ],
      "summary": "The present document describes use cases and aspec t s rela t ed t o interconnect of SNPNs as well as Scalable SNPN Interconnect with dynamic connections . Potential service requirements are derived for these use cases and are consolidated in a dedicated chapter. The report ends with recommendations regarding the continuation of the work. NOTE: T here is no requirement for a PLMN to enhance their interconnect with SNPNs or operate an identity provider.",
      "_mtime": 1774526762.3485556
    },
    {
      "id": "22851-j10",
      "file": "docs/22851-j10/22851-j10.docx",
      "spec": "",
      "type": "",
      "title": "",
      "release": "Release 1",
      "keywords": [
        "interconnect",
        "network sharing",
        "NG-RAN",
        "mobility",
        "charging",
        "5G",
        "security"
      ],
      "summary": "The present document investigates use cases and potential new requirements related to 3GPP system enhanced support of specific 5G network sharing deployment scenarios , in particular where there is no direct interconnection between the shared NG-RAN and participating operators’ core networks. It includes the following aspects: - Mobility and service continuity, e.g., when moving from a non-shared 4G/5G network to a shared 5G network and vice versa, with focus on CN aspects. - Potential security requirements. - Charging requirements (e.g. , based on traffic differentiation in specific network s",
      "_mtime": 1774526762.6245556
    },
    {
      "id": "22890-j00",
      "file": "docs/22890-j00/22890-j00.doc",
      "spec": "3GPP TR 22.890 V19.0.0",
      "type": "Technical Report",
      "title": "Study on Supporting of Railway Smart Station Services",
      "release": "Release 19",
      "keywords": [
        "railway",
        "smart station",
        "station monitoring",
        "passenger services",
        "railway IoT",
        "5G railway"
      ],
      "summary": "The present document analyses use cases of smart railway station such as station operation monitoring and control, passenger supporting services and evolution use cases of business and performance applications currently included in TR22.989 in order to derive potential requirements.",
      "_mtime": 1774528383.0923867
    }
  ]
 }
@@ -0,0 +1,30 @@
 {
  "skill_name": "docs-rag",
  "evals": [
    {
      "id": 1,
      "prompt": "What are the 3GPP Release 19 requirements for UAV flight path management?",
      "expected_output": "References TR 22.843 and describes 5G system support for UAV flight path recommendation, inflight monitoring/control, and route management based on network capacity and QoS."
    },
    {
      "id": 2,
      "prompt": "What is Ambient IoT and what specs cover it?",
      "expected_output": "Identifies TS 22.369 and TR 22.840 as the relevant specs. Explains that Ambient IoT devices are battery-less or limited-energy-storage IoT devices powered by energy harvesting (radio waves, light, motion, heat)."
    },
    {
      "id": 3,
      "prompt": "Which 3GPP spec covers ISAC — Integrated Sensing and Communication?",
      "expected_output": "Identifies TR 22.837 V19.4.0 and describes its scope: use cases and requirements for 5G sensing services targeting autonomous driving, V2X, UAVs, smart city, healthcare, and maritime sectors."
    },
    {
      "id": 4,
      "prompt": "What does 3GPP say about SNPN interconnect with PLMNs?",
      "expected_output": "References TR 22.848 and explains it covers SNPN interconnect use cases and scalable SNPN interconnect with dynamic connections, noting PLMNs are not required to enhance interconnect with SNPNs."
    },
    {
      "id": 5,
      "prompt": "What are the mission critical service requirements in Release 19?",
      "expected_output": "References TS 22.280 V19.5.0 and describes common requirements for MCPTT, MCData, and MCVideo, covering public safety, group communications, proximity services, and commercial applications."
    }
  ]
 }
@@ -0,0 +1,68 @@
 ---
 name: python-skill
 description: Python coding best practices and patterns. Use when writing, reviewing, or debugging Python code.
 ---
 # Python Skill
 ## Type Hints (Python 3.10+)
 ```python
 # Bad
 def process(data, callback):
    return callback(data)
 # Good
 from typing import Callable
 def process(data: dict, callback: Callable[[dict], str]) -> str:
    return callback(data)
 ```
 ## Dataclasses Over Plain Dicts
 ```python
 from dataclasses import dataclass
@dataclass
 class GameFrame:
    timestamp: float
    objects: list[str]
    confidence: float = 1.0
 ```
 ## Context Managers for Resources
 ```python
 # Bad
 f = open("log.txt")
 data = f.read()
 f.close()
 # Good
 with open("log.txt") as f:
    data = f.read()
 ```
 ## List Comprehensions vs Loops
 ```python
 # Prefer comprehension for simple transforms
 enemies = [obj for obj in objects if obj.type == "enemy"]
 # Use loop when logic is complex (>2 conditions)
 results = []
 for obj in objects:
    if obj.type == "enemy" and obj.visible and obj.distance < 100:
        results.append(obj.position)
 ```
 ## Error Handling
 ```python
 # Be specific — never catch bare Exception silently
 try:
    frame = capture_screen()
 except ScreenCaptureError as e:
    logger.error("Screen capture failed: %s", e)
    raise
 ```
@@ -0,0 +1,20 @@
 {
  "skill_name": "python",
  "evals": [
    {
      "id": 1,
      "prompt": "What's the best way to handle file reading in Python?",
      "expected_output": "Recommends 'with' statement (context manager) for automatic resource cleanup, shows open() usage with proper mode."
    },
    {
      "id": 2,
      "prompt": "How should I add type hints to a Python function?",
      "expected_output": "Shows parameter type annotations, return type with ->, use of dataclasses or TypedDict for complex types."
    },
    {
      "id": 3,
      "prompt": "When should I use a list comprehension vs a for loop in Python?",
      "expected_output": "Recommends comprehensions for simple transforms, for loops when logic is complex (multiple conditions/side effects), with examples of each."
    }
  ]
 }
@@ -0,0 +1,52 @@
 ---
 name: testing-skill
 description: Testing best practices for Python and general projects. Use when writing unit tests, debugging test failures, or improving test coverage.
 ---
 # Testing Skill
 ## Test Structure (AAA Pattern)
 ```python
 def test_decision_layer_returns_action():
    # Arrange
    state = {"health": 80, "enemy_visible": True}
    # Act
    action = decision_layer.decide(state)
    # Assert
    assert action == "attack"
 ```
 ## Pytest Tips
 ### Parametrize to Avoid Duplication
 ```python
 import pytest
@pytest.mark.parametrize("health,expected", [
    (100, "idle"),
    (30,  "flee"),
    (0,   "dead"),
 ])
 def test_state_by_health(health, expected):
    assert get_state(health) == expected
 ```
 ### Use Fixtures for Shared Setup
 ```python
@pytest.fixture
 def mock_vision():
    return {"objects": ["enemy", "wall"], "confidence": 0.95}
 def test_understanding_layer(mock_vision):
    result = understanding_layer.parse(mock_vision)
    assert "enemy" in result["threats"]
 ```
 ## What to Test
 - ✅ Happy path (normal input)
 - ✅ Edge cases (empty, None, boundary values)
 - ✅ Error paths (invalid input raises expected exception)
 - ❌ Don't test implementation details — test behavior
@@ -0,0 +1,20 @@
 {
  "skill_name": "testing",
  "evals": [
    {
      "id": 1,
      "prompt": "How should I write a pytest test for a function that returns game state?",
      "expected_output": "Shows AAA pattern (Arrange/Act/Assert), uses pytest assertions, and demonstrates clear test structure."
    },
    {
      "id": 2,
      "prompt": "Show me how to use pytest fixtures for shared test setup",
      "expected_output": "Demonstrates @pytest.fixture decorator, fixture injection into test functions, and explains reuse across multiple tests."
    },
    {
      "id": 3,
      "prompt": "What should I test in a unit test? What should I avoid testing?",
      "expected_output": "Recommends testing behavior/outputs not implementation details, covering happy path and edge cases, avoiding testing private internals."
    }
  ]
 }
@@ -0,0 +1,47 @@
 ---
 name: typescript-skill
 description: TypeScript coding patterns and type safety guide. Use when writing, reviewing, or debugging TypeScript code.
 ---
 # TypeScript Skill
 ## Key Rules
 ### Always Prefer Explicit Types
 ```typescript
 // Bad
 const process = (data: any) => data.value;
 // Good
 interface GameState { value: number; }
 const process = (data: GameState): number => data.value;
 ```
 ### Use `unknown` Instead of `any`
 ```typescript
 // Bad
 function parse(input: any) { return input.name; }
 // Good
 function parse(input: unknown): string {
  if (typeof input === 'object' && input !== null && 'name' in input) {
    return String((input as { name: unknown }).name);
  }
  throw new Error('Invalid input');
 }
 ```
 ### Prefer `const` Assertions for Literals
 ```typescript
 const DIRECTIONS = ['up', 'down', 'left', 'right'] as const;
 type Direction = typeof DIRECTIONS[number]; // 'up' | 'down' | 'left' | 'right'
 ```
 ## Common Patterns
 | Pattern | Use Case |
 |---------|----------|
 | `type` | Unions, intersections, primitives |
 | `interface` | Object shapes (extendable) |
 | `enum` | Named constants (prefer `as const` for simple cases) |
 | `generic <T>` | Reusable, type-safe utilities |
@@ -0,0 +1,20 @@
 {
  "skill_name": "typescript",
  "evals": [
    {
      "id": 1,
      "prompt": "How do I avoid using 'any' type in TypeScript?",
      "expected_output": "Explains using 'unknown' instead of 'any', with type guards, and shows interface/type definitions as alternatives."
    },
    {
      "id": 2,
      "prompt": "Show me how to create a type-safe function in TypeScript that processes a list of items",
      "expected_output": "Demonstrates a generic function with <T> type parameter, proper return type annotation, and no use of 'any'."
    },
    {
      "id": 3,
      "prompt": "What's the difference between type and interface in TypeScript?",
      "expected_output": "Explains that interfaces are extendable and better for object shapes, types support unions/intersections, with concrete examples of each."
    }
  ]
 }