Add BMAD commands, skills, and module files from piSetup
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic pre-pass for execution efficiency scanner.
|
||||
|
||||
Extracts dependency graph data and execution patterns from a BMad skill
|
||||
so the LLM scanner can evaluate efficiency from compact structured data.
|
||||
|
||||
Covers:
|
||||
- Dependency graph from skill structure
|
||||
|
||||
- Circular dependency detection
|
||||
- Transitive dependency redundancy
|
||||
- Parallelizable stage groups (independent nodes)
|
||||
- Sequential pattern detection in prompts (numbered Read/Grep/Glob steps)
|
||||
- Subagent-from-subagent detection
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def detect_cycles(graph: dict[str, list[str]]) -> list[list[str]]:
|
||||
"""Detect circular dependencies in a directed graph using DFS."""
|
||||
cycles = []
|
||||
visited = set()
|
||||
path = []
|
||||
path_set = set()
|
||||
|
||||
def dfs(node: str) -> None:
|
||||
if node in path_set:
|
||||
cycle_start = path.index(node)
|
||||
cycles.append(path[cycle_start:] + [node])
|
||||
return
|
||||
if node in visited:
|
||||
return
|
||||
visited.add(node)
|
||||
path.append(node)
|
||||
path_set.add(node)
|
||||
for neighbor in graph.get(node, []):
|
||||
dfs(neighbor)
|
||||
path.pop()
|
||||
path_set.discard(node)
|
||||
|
||||
for node in graph:
|
||||
dfs(node)
|
||||
|
||||
return cycles
|
||||
|
||||
|
||||
def find_transitive_redundancy(graph: dict[str, list[str]]) -> list[dict]:
|
||||
"""Find cases where A declares dependency on C, but A->B->C already exists."""
|
||||
redundancies = []
|
||||
|
||||
def get_transitive(node: str, visited: set | None = None) -> set[str]:
|
||||
if visited is None:
|
||||
visited = set()
|
||||
for dep in graph.get(node, []):
|
||||
if dep not in visited:
|
||||
visited.add(dep)
|
||||
get_transitive(dep, visited)
|
||||
return visited
|
||||
|
||||
for node, direct_deps in graph.items():
|
||||
for dep in direct_deps:
|
||||
# Check if dep is reachable through other direct deps
|
||||
other_deps = [d for d in direct_deps if d != dep]
|
||||
for other in other_deps:
|
||||
transitive = get_transitive(other)
|
||||
if dep in transitive:
|
||||
redundancies.append({
|
||||
'node': node,
|
||||
'redundant_dep': dep,
|
||||
'already_via': other,
|
||||
'issue': f'"{node}" declares "{dep}" as dependency, but already reachable via "{other}"',
|
||||
})
|
||||
|
||||
return redundancies
|
||||
|
||||
|
||||
def find_parallel_groups(graph: dict[str, list[str]], all_nodes: set[str]) -> list[list[str]]:
|
||||
"""Find groups of nodes that have no dependencies on each other (can run in parallel)."""
|
||||
# Nodes with no incoming edges from other nodes in the set
|
||||
independent_groups = []
|
||||
|
||||
# Simple approach: find all nodes at each "level" of the DAG
|
||||
remaining = set(all_nodes)
|
||||
while remaining:
|
||||
# Nodes whose dependencies are all satisfied (not in remaining)
|
||||
ready = set()
|
||||
for node in remaining:
|
||||
deps = set(graph.get(node, []))
|
||||
if not deps & remaining:
|
||||
ready.add(node)
|
||||
if not ready:
|
||||
break # Circular dependency, can't proceed
|
||||
if len(ready) > 1:
|
||||
independent_groups.append(sorted(ready))
|
||||
remaining -= ready
|
||||
|
||||
return independent_groups
|
||||
|
||||
|
||||
def scan_sequential_patterns(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Detect sequential operation patterns that could be parallel."""
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
patterns = []
|
||||
|
||||
# Sequential numbered steps with Read/Grep/Glob
|
||||
tool_steps = re.findall(
|
||||
r'^\s*\d+\.\s+.*?\b(Read|Grep|Glob|read|grep|glob)\b.*$',
|
||||
content, re.MULTILINE
|
||||
)
|
||||
if len(tool_steps) >= 3:
|
||||
patterns.append({
|
||||
'file': rel_path,
|
||||
'type': 'sequential-tool-calls',
|
||||
'count': len(tool_steps),
|
||||
'issue': f'{len(tool_steps)} sequential tool call steps found — check if independent calls can be parallel',
|
||||
})
|
||||
|
||||
# "Read all files" / "for each" loop patterns
|
||||
loop_patterns = [
|
||||
(r'[Rr]ead all (?:files|documents|prompts)', 'read-all'),
|
||||
(r'[Ff]or each (?:file|document|prompt|stage)', 'for-each-loop'),
|
||||
(r'[Aa]nalyze each', 'analyze-each'),
|
||||
(r'[Ss]can (?:through|all|each)', 'scan-all'),
|
||||
(r'[Rr]eview (?:all|each)', 'review-all'),
|
||||
]
|
||||
for pattern, ptype in loop_patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
if matches:
|
||||
patterns.append({
|
||||
'file': rel_path,
|
||||
'type': ptype,
|
||||
'count': len(matches),
|
||||
'issue': f'"{matches[0]}" pattern found — consider parallel subagent delegation',
|
||||
})
|
||||
|
||||
# Subagent spawning from subagent (impossible)
|
||||
if re.search(r'(?i)spawn.*subagent|launch.*subagent|create.*subagent', content):
|
||||
# Check if this file IS a subagent (non-SKILL.md, non-numbered prompt at root)
|
||||
if rel_path != 'SKILL.md' and not re.match(r'^\d+-', rel_path):
|
||||
patterns.append({
|
||||
'file': rel_path,
|
||||
'type': 'subagent-chain-violation',
|
||||
'count': 1,
|
||||
'issue': 'Subagent file references spawning other subagents — subagents cannot spawn subagents',
|
||||
})
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def scan_execution_deps(skill_path: Path) -> dict:
|
||||
"""Run all deterministic execution efficiency checks."""
|
||||
# Build dependency graph from skill structure
|
||||
dep_graph: dict[str, list[str]] = {}
|
||||
prefer_after: dict[str, list[str]] = {}
|
||||
all_stages: set[str] = set()
|
||||
|
||||
# Check for stage-level prompt files at skill root
|
||||
for f in sorted(skill_path.iterdir()):
|
||||
if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md':
|
||||
all_stages.add(f.stem)
|
||||
|
||||
# Cycle detection
|
||||
cycles = detect_cycles(dep_graph)
|
||||
|
||||
# Transitive redundancy
|
||||
redundancies = find_transitive_redundancy(dep_graph)
|
||||
|
||||
# Parallel groups
|
||||
parallel_groups = find_parallel_groups(dep_graph, all_stages)
|
||||
|
||||
# Sequential pattern detection across all prompt and agent files at root
|
||||
sequential_patterns = []
|
||||
for f in sorted(skill_path.iterdir()):
|
||||
if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md':
|
||||
patterns = scan_sequential_patterns(f, f.name)
|
||||
sequential_patterns.extend(patterns)
|
||||
|
||||
# Also scan SKILL.md
|
||||
skill_md = skill_path / 'SKILL.md'
|
||||
if skill_md.exists():
|
||||
sequential_patterns.extend(scan_sequential_patterns(skill_md, 'SKILL.md'))
|
||||
|
||||
# Build issues from deterministic findings
|
||||
issues = []
|
||||
for cycle in cycles:
|
||||
issues.append({
|
||||
'severity': 'critical',
|
||||
'category': 'circular-dependency',
|
||||
'issue': f'Circular dependency detected: {" → ".join(cycle)}',
|
||||
})
|
||||
for r in redundancies:
|
||||
issues.append({
|
||||
'severity': 'medium',
|
||||
'category': 'dependency-bloat',
|
||||
'issue': r['issue'],
|
||||
})
|
||||
for p in sequential_patterns:
|
||||
severity = 'critical' if p['type'] == 'subagent-chain-violation' else 'medium'
|
||||
issues.append({
|
||||
'file': p['file'],
|
||||
'severity': severity,
|
||||
'category': p['type'],
|
||||
'issue': p['issue'],
|
||||
})
|
||||
|
||||
by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
|
||||
for issue in issues:
|
||||
sev = issue['severity']
|
||||
if sev in by_severity:
|
||||
by_severity[sev] += 1
|
||||
|
||||
status = 'pass'
|
||||
if by_severity['critical'] > 0:
|
||||
status = 'fail'
|
||||
elif by_severity['medium'] > 0:
|
||||
status = 'warning'
|
||||
|
||||
return {
|
||||
'scanner': 'execution-efficiency-prepass',
|
||||
'script': 'prepass-execution-deps.py',
|
||||
'version': '1.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': status,
|
||||
'dependency_graph': {
|
||||
'stages': sorted(all_stages),
|
||||
'hard_dependencies': dep_graph,
|
||||
'soft_dependencies': prefer_after,
|
||||
'cycles': cycles,
|
||||
'transitive_redundancies': redundancies,
|
||||
'parallel_groups': parallel_groups,
|
||||
},
|
||||
'sequential_patterns': sequential_patterns,
|
||||
'issues': issues,
|
||||
'summary': {
|
||||
'total_issues': len(issues),
|
||||
'by_severity': by_severity,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract execution dependency graph and patterns for LLM scanner pre-pass',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_execution_deps(args.skill_path)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic pre-pass for prompt craft scanner.
|
||||
|
||||
Extracts metrics and flagged patterns from SKILL.md and prompt files
|
||||
so the LLM scanner can work from compact data instead of reading raw files.
|
||||
|
||||
Covers:
|
||||
- SKILL.md line count and section inventory
|
||||
- Overview section size
|
||||
- Inline data detection (tables, fenced code blocks)
|
||||
- Defensive padding pattern grep
|
||||
- Meta-explanation pattern grep
|
||||
- Back-reference detection ("as described above")
|
||||
- Config header and progression condition presence per prompt
|
||||
- File-level token estimates (chars / 4 rough approximation)
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Defensive padding / filler patterns
|
||||
WASTE_PATTERNS = [
|
||||
(r'\b[Mm]ake sure (?:to|you)\b', 'defensive-padding', 'Defensive: "make sure to/you"'),
|
||||
(r"\b[Dd]on'?t forget (?:to|that)\b", 'defensive-padding', "Defensive: \"don't forget\""),
|
||||
(r'\b[Rr]emember (?:to|that)\b', 'defensive-padding', 'Defensive: "remember to/that"'),
|
||||
(r'\b[Bb]e sure to\b', 'defensive-padding', 'Defensive: "be sure to"'),
|
||||
(r'\b[Pp]lease ensure\b', 'defensive-padding', 'Defensive: "please ensure"'),
|
||||
(r'\b[Ii]t is important (?:to|that)\b', 'defensive-padding', 'Defensive: "it is important"'),
|
||||
(r'\b[Yy]ou are an AI\b', 'meta-explanation', 'Meta: "you are an AI"'),
|
||||
(r'\b[Aa]s a language model\b', 'meta-explanation', 'Meta: "as a language model"'),
|
||||
(r'\b[Aa]s an AI assistant\b', 'meta-explanation', 'Meta: "as an AI assistant"'),
|
||||
(r'\b[Tt]his (?:workflow|skill|process) is designed to\b', 'meta-explanation', 'Meta: "this workflow is designed to"'),
|
||||
(r'\b[Tt]he purpose of this (?:section|step) is\b', 'meta-explanation', 'Meta: "the purpose of this section is"'),
|
||||
(r"\b[Ll]et'?s (?:think about|begin|start)\b", 'filler', "Filler: \"let's think/begin\""),
|
||||
(r'\b[Nn]ow we(?:\'ll| will)\b', 'filler', "Filler: \"now we'll\""),
|
||||
]
|
||||
|
||||
# Back-reference patterns (self-containment risk)
|
||||
BACKREF_PATTERNS = [
|
||||
(r'\bas described above\b', 'Back-reference: "as described above"'),
|
||||
(r'\bper the overview\b', 'Back-reference: "per the overview"'),
|
||||
(r'\bas mentioned (?:above|in|earlier)\b', 'Back-reference: "as mentioned above/in/earlier"'),
|
||||
(r'\bsee (?:above|the overview)\b', 'Back-reference: "see above/the overview"'),
|
||||
(r'\brefer to (?:the )?(?:above|overview|SKILL)\b', 'Back-reference: "refer to above/overview"'),
|
||||
]
|
||||
|
||||
|
||||
def count_tables(content: str) -> tuple[int, int]:
|
||||
"""Count markdown tables and their total lines."""
|
||||
table_count = 0
|
||||
table_lines = 0
|
||||
in_table = False
|
||||
for line in content.split('\n'):
|
||||
if '|' in line and re.match(r'^\s*\|', line):
|
||||
if not in_table:
|
||||
table_count += 1
|
||||
in_table = True
|
||||
table_lines += 1
|
||||
else:
|
||||
in_table = False
|
||||
return table_count, table_lines
|
||||
|
||||
|
||||
def count_fenced_blocks(content: str) -> tuple[int, int]:
|
||||
"""Count fenced code blocks and their total lines."""
|
||||
block_count = 0
|
||||
block_lines = 0
|
||||
in_block = False
|
||||
for line in content.split('\n'):
|
||||
if line.strip().startswith('```'):
|
||||
if in_block:
|
||||
in_block = False
|
||||
else:
|
||||
in_block = True
|
||||
block_count += 1
|
||||
elif in_block:
|
||||
block_lines += 1
|
||||
return block_count, block_lines
|
||||
|
||||
|
||||
def extract_overview_size(content: str) -> int:
|
||||
"""Count lines in the ## Overview section."""
|
||||
lines = content.split('\n')
|
||||
in_overview = False
|
||||
overview_lines = 0
|
||||
for line in lines:
|
||||
if re.match(r'^##\s+Overview\b', line):
|
||||
in_overview = True
|
||||
continue
|
||||
elif in_overview and re.match(r'^##\s', line):
|
||||
break
|
||||
elif in_overview:
|
||||
overview_lines += 1
|
||||
return overview_lines
|
||||
|
||||
|
||||
def scan_file_patterns(filepath: Path, rel_path: str) -> dict:
|
||||
"""Extract metrics and pattern matches from a single file."""
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# Token estimate (rough: chars / 4)
|
||||
token_estimate = len(content) // 4
|
||||
|
||||
# Section inventory
|
||||
sections = []
|
||||
for i, line in enumerate(lines, 1):
|
||||
m = re.match(r'^(#{2,3})\s+(.+)$', line)
|
||||
if m:
|
||||
sections.append({'level': len(m.group(1)), 'title': m.group(2).strip(), 'line': i})
|
||||
|
||||
# Tables and code blocks
|
||||
table_count, table_lines = count_tables(content)
|
||||
block_count, block_lines = count_fenced_blocks(content)
|
||||
|
||||
# Pattern matches
|
||||
waste_matches = []
|
||||
for pattern, category, label in WASTE_PATTERNS:
|
||||
for m in re.finditer(pattern, content):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
waste_matches.append({
|
||||
'line': line_num,
|
||||
'category': category,
|
||||
'pattern': label,
|
||||
'context': lines[line_num - 1].strip()[:100],
|
||||
})
|
||||
|
||||
backref_matches = []
|
||||
for pattern, label in BACKREF_PATTERNS:
|
||||
for m in re.finditer(pattern, content, re.IGNORECASE):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
backref_matches.append({
|
||||
'line': line_num,
|
||||
'pattern': label,
|
||||
'context': lines[line_num - 1].strip()[:100],
|
||||
})
|
||||
|
||||
# Config header
|
||||
has_config_header = '{communication_language}' in content or '{document_output_language}' in content
|
||||
|
||||
# Progression condition
|
||||
prog_keywords = ['progress', 'advance', 'move to', 'next stage',
|
||||
'when complete', 'proceed to', 'transition', 'completion criteria']
|
||||
has_progression = any(kw in content.lower() for kw in prog_keywords)
|
||||
|
||||
result = {
|
||||
'file': rel_path,
|
||||
'line_count': line_count,
|
||||
'token_estimate': token_estimate,
|
||||
'sections': sections,
|
||||
'table_count': table_count,
|
||||
'table_lines': table_lines,
|
||||
'fenced_block_count': block_count,
|
||||
'fenced_block_lines': block_lines,
|
||||
'waste_patterns': waste_matches,
|
||||
'back_references': backref_matches,
|
||||
'has_config_header': has_config_header,
|
||||
'has_progression': has_progression,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def scan_prompt_metrics(skill_path: Path) -> dict:
|
||||
"""Extract metrics from all prompt-relevant files."""
|
||||
files_data = []
|
||||
|
||||
# SKILL.md
|
||||
skill_md = skill_path / 'SKILL.md'
|
||||
if skill_md.exists():
|
||||
data = scan_file_patterns(skill_md, 'SKILL.md')
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
data['overview_lines'] = extract_overview_size(content)
|
||||
data['is_skill_md'] = True
|
||||
files_data.append(data)
|
||||
|
||||
# Prompt files at skill root (non-SKILL.md .md files)
|
||||
for f in sorted(skill_path.iterdir()):
|
||||
if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md':
|
||||
data = scan_file_patterns(f, f.name)
|
||||
data['is_skill_md'] = False
|
||||
files_data.append(data)
|
||||
|
||||
# Resources (just sizes, for progressive disclosure assessment)
|
||||
resources_dir = skill_path / 'resources'
|
||||
resource_sizes = {}
|
||||
if resources_dir.exists():
|
||||
for f in sorted(resources_dir.iterdir()):
|
||||
if f.is_file() and f.suffix in ('.md', '.json', '.yaml', '.yml'):
|
||||
content = f.read_text(encoding='utf-8')
|
||||
resource_sizes[f.name] = {
|
||||
'lines': len(content.split('\n')),
|
||||
'tokens': len(content) // 4,
|
||||
}
|
||||
|
||||
# Aggregate stats
|
||||
total_waste = sum(len(f['waste_patterns']) for f in files_data)
|
||||
total_backrefs = sum(len(f['back_references']) for f in files_data)
|
||||
total_tokens = sum(f['token_estimate'] for f in files_data)
|
||||
prompts_with_config = sum(1 for f in files_data if not f.get('is_skill_md') and f['has_config_header'])
|
||||
prompts_with_progression = sum(1 for f in files_data if not f.get('is_skill_md') and f['has_progression'])
|
||||
total_prompts = sum(1 for f in files_data if not f.get('is_skill_md'))
|
||||
|
||||
skill_md_data = next((f for f in files_data if f.get('is_skill_md')), None)
|
||||
|
||||
return {
|
||||
'scanner': 'prompt-craft-prepass',
|
||||
'script': 'prepass-prompt-metrics.py',
|
||||
'version': '1.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': 'info',
|
||||
'skill_md_summary': {
|
||||
'line_count': skill_md_data['line_count'] if skill_md_data else 0,
|
||||
'token_estimate': skill_md_data['token_estimate'] if skill_md_data else 0,
|
||||
'overview_lines': skill_md_data.get('overview_lines', 0) if skill_md_data else 0,
|
||||
'table_count': skill_md_data['table_count'] if skill_md_data else 0,
|
||||
'table_lines': skill_md_data['table_lines'] if skill_md_data else 0,
|
||||
'fenced_block_count': skill_md_data['fenced_block_count'] if skill_md_data else 0,
|
||||
'fenced_block_lines': skill_md_data['fenced_block_lines'] if skill_md_data else 0,
|
||||
'section_count': len(skill_md_data['sections']) if skill_md_data else 0,
|
||||
},
|
||||
'prompt_health': {
|
||||
'total_prompts': total_prompts,
|
||||
'prompts_with_config_header': prompts_with_config,
|
||||
'prompts_with_progression': prompts_with_progression,
|
||||
},
|
||||
'aggregate': {
|
||||
'total_files_scanned': len(files_data),
|
||||
'total_token_estimate': total_tokens,
|
||||
'total_waste_patterns': total_waste,
|
||||
'total_back_references': total_backrefs,
|
||||
},
|
||||
'resource_sizes': resource_sizes,
|
||||
'files': files_data,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract prompt craft metrics for LLM scanner pre-pass',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_prompt_metrics(args.skill_path)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,480 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic pre-pass for workflow integrity scanner.
|
||||
|
||||
Extracts structural metadata from a BMad skill that the LLM scanner
|
||||
can use instead of reading all files itself. Covers:
|
||||
- Frontmatter parsing and validation
|
||||
- Section inventory (H2/H3 headers)
|
||||
- Template artifact detection
|
||||
- Stage file cross-referencing
|
||||
- Stage numbering validation
|
||||
- Config header detection in prompts
|
||||
- Language/directness pattern grep
|
||||
- On Exit / Exiting section detection (invalid)
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Template artifacts that should NOT appear in finalized skills
|
||||
TEMPLATE_ARTIFACTS = [
|
||||
r'\{if-complex-workflow\}', r'\{/if-complex-workflow\}',
|
||||
r'\{if-simple-workflow\}', r'\{/if-simple-workflow\}',
|
||||
r'\{if-simple-utility\}', r'\{/if-simple-utility\}',
|
||||
r'\{if-module\}', r'\{/if-module\}',
|
||||
r'\{if-headless\}', r'\{/if-headless\}',
|
||||
r'\{displayName\}', r'\{skillName\}',
|
||||
]
|
||||
# Runtime variables that ARE expected (not artifacts)
|
||||
RUNTIME_VARS = {
|
||||
'{user_name}', '{communication_language}', '{document_output_language}',
|
||||
'{project-root}', '{output_folder}', '{planning_artifacts}',
|
||||
}
|
||||
|
||||
# Directness anti-patterns
|
||||
DIRECTNESS_PATTERNS = [
|
||||
(r'\byou should\b', 'Suggestive "you should" — use direct imperative'),
|
||||
(r'\bplease\b(?! note)', 'Polite "please" — use direct imperative'),
|
||||
(r'\bhandle appropriately\b', 'Ambiguous "handle appropriately" — specify how'),
|
||||
(r'\bwhen ready\b', 'Vague "when ready" — specify testable condition'),
|
||||
]
|
||||
|
||||
# Invalid sections
|
||||
INVALID_SECTIONS = [
|
||||
(r'^##\s+On\s+Exit\b', 'On Exit section found — no exit hooks exist in the system, this will never run'),
|
||||
(r'^##\s+Exiting\b', 'Exiting section found — no exit hooks exist in the system, this will never run'),
|
||||
]
|
||||
|
||||
|
||||
def parse_frontmatter(content: str) -> tuple[dict | None, list[dict]]:
|
||||
"""Parse YAML frontmatter and validate."""
|
||||
findings = []
|
||||
fm_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
|
||||
if not fm_match:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'critical', 'category': 'frontmatter',
|
||||
'issue': 'No YAML frontmatter found',
|
||||
})
|
||||
return None, findings
|
||||
|
||||
try:
|
||||
# Frontmatter is YAML-like key: value pairs — parse manually
|
||||
fm = {}
|
||||
for line in fm_match.group(1).strip().split('\n'):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
if ':' in line:
|
||||
key, _, value = line.partition(':')
|
||||
fm[key.strip()] = value.strip().strip('"').strip("'")
|
||||
except Exception as e:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'critical', 'category': 'frontmatter',
|
||||
'issue': f'Invalid frontmatter: {e}',
|
||||
})
|
||||
return None, findings
|
||||
|
||||
if not isinstance(fm, dict):
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'critical', 'category': 'frontmatter',
|
||||
'issue': 'Frontmatter is not a YAML mapping',
|
||||
})
|
||||
return None, findings
|
||||
|
||||
# name check
|
||||
name = fm.get('name')
|
||||
if not name:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'critical', 'category': 'frontmatter',
|
||||
'issue': 'Missing "name" field in frontmatter',
|
||||
})
|
||||
elif not re.match(r'^[a-z0-9]+(-[a-z0-9]+)*$', name):
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'high', 'category': 'frontmatter',
|
||||
'issue': f'Name "{name}" is not kebab-case',
|
||||
})
|
||||
elif not name.startswith('bmad-'):
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'medium', 'category': 'frontmatter',
|
||||
'issue': f'Name "{name}" does not follow bmad-* naming convention',
|
||||
})
|
||||
|
||||
# description check
|
||||
desc = fm.get('description')
|
||||
if not desc:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'high', 'category': 'frontmatter',
|
||||
'issue': 'Missing "description" field in frontmatter',
|
||||
})
|
||||
elif 'Use when' not in desc and 'use when' not in desc:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'medium', 'category': 'frontmatter',
|
||||
'issue': 'Description missing "Use when..." trigger phrase',
|
||||
})
|
||||
|
||||
# Extra fields check
|
||||
allowed = {'name', 'description', 'menu-code'}
|
||||
extra = set(fm.keys()) - allowed
|
||||
if extra:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'low', 'category': 'frontmatter',
|
||||
'issue': f'Extra frontmatter fields: {", ".join(sorted(extra))}',
|
||||
})
|
||||
|
||||
return fm, findings
|
||||
|
||||
|
||||
def extract_sections(content: str) -> list[dict]:
|
||||
"""Extract all H2 headers with line numbers."""
|
||||
sections = []
|
||||
for i, line in enumerate(content.split('\n'), 1):
|
||||
m = re.match(r'^(#{2,3})\s+(.+)$', line)
|
||||
if m:
|
||||
sections.append({
|
||||
'level': len(m.group(1)),
|
||||
'title': m.group(2).strip(),
|
||||
'line': i,
|
||||
})
|
||||
return sections
|
||||
|
||||
|
||||
def check_required_sections(sections: list[dict]) -> list[dict]:
|
||||
"""Check for required and invalid sections."""
|
||||
findings = []
|
||||
h2_titles = [s['title'] for s in sections if s['level'] == 2]
|
||||
|
||||
if 'Overview' not in h2_titles:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'high', 'category': 'sections',
|
||||
'issue': 'Missing ## Overview section',
|
||||
})
|
||||
|
||||
if 'On Activation' not in h2_titles:
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 1,
|
||||
'severity': 'high', 'category': 'sections',
|
||||
'issue': 'Missing ## On Activation section',
|
||||
})
|
||||
|
||||
# Invalid sections
|
||||
for s in sections:
|
||||
if s['level'] == 2:
|
||||
for pattern, message in INVALID_SECTIONS:
|
||||
if re.match(pattern, f"## {s['title']}"):
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': s['line'],
|
||||
'severity': 'high', 'category': 'invalid-section',
|
||||
'issue': message,
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def find_template_artifacts(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Scan for orphaned template substitution artifacts."""
|
||||
findings = []
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
|
||||
for pattern in TEMPLATE_ARTIFACTS:
|
||||
for m in re.finditer(pattern, content):
|
||||
matched = m.group()
|
||||
if matched in RUNTIME_VARS:
|
||||
continue
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
findings.append({
|
||||
'file': rel_path, 'line': line_num,
|
||||
'severity': 'high', 'category': 'artifacts',
|
||||
'issue': f'Orphaned template artifact: {matched}',
|
||||
'fix': 'Resolve or remove this template conditional/placeholder',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def cross_reference_stages(skill_path: Path, skill_content: str) -> tuple[dict, list[dict]]:
|
||||
"""Cross-reference stage files between SKILL.md and numbered prompt files at skill root."""
|
||||
findings = []
|
||||
|
||||
# Get actual numbered prompt files at skill root (exclude SKILL.md)
|
||||
actual_files = set()
|
||||
for f in skill_path.iterdir():
|
||||
if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name):
|
||||
actual_files.add(f.name)
|
||||
|
||||
# Find stage references in SKILL.md — look for both old prompts/ style and new root style
|
||||
referenced = set()
|
||||
# Match `prompts/XX-name.md` (legacy) or bare `XX-name.md` references
|
||||
ref_pattern = re.compile(r'(?:prompts/)?(\d+-[^\s)`]+\.md)')
|
||||
for m in ref_pattern.finditer(skill_content):
|
||||
referenced.add(m.group(1))
|
||||
|
||||
# Missing files (referenced but don't exist)
|
||||
missing = referenced - actual_files
|
||||
for f in sorted(missing):
|
||||
findings.append({
|
||||
'file': 'SKILL.md', 'line': 0,
|
||||
'severity': 'critical', 'category': 'missing-stage',
|
||||
'issue': f'Referenced stage file does not exist: {f}',
|
||||
})
|
||||
|
||||
# Orphaned files (exist but not referenced)
|
||||
orphaned = actual_files - referenced
|
||||
for f in sorted(orphaned):
|
||||
findings.append({
|
||||
'file': f, 'line': 0,
|
||||
'severity': 'medium', 'category': 'naming',
|
||||
'issue': f'Stage file exists but not referenced in SKILL.md: {f}',
|
||||
})
|
||||
|
||||
# Stage numbering check
|
||||
numbered = []
|
||||
for f in sorted(actual_files):
|
||||
m = re.match(r'^(\d+)-(.+)\.md$', f)
|
||||
if m:
|
||||
numbered.append((int(m.group(1)), f))
|
||||
|
||||
if numbered:
|
||||
numbered.sort()
|
||||
nums = [n[0] for n in numbered]
|
||||
expected = list(range(nums[0], nums[0] + len(nums)))
|
||||
if nums != expected:
|
||||
gaps = set(expected) - set(nums)
|
||||
if gaps:
|
||||
findings.append({
|
||||
'file': skill_path.name, 'line': 0,
|
||||
'severity': 'medium', 'category': 'naming',
|
||||
'issue': f'Stage numbering has gaps: missing {sorted(gaps)}',
|
||||
})
|
||||
|
||||
stage_summary = {
|
||||
'total_stages': len(actual_files),
|
||||
'referenced': sorted(referenced),
|
||||
'actual': sorted(actual_files),
|
||||
'missing_stages': sorted(missing),
|
||||
'orphaned_stages': sorted(orphaned),
|
||||
}
|
||||
|
||||
return stage_summary, findings
|
||||
|
||||
|
||||
def check_prompt_basics(skill_path: Path) -> tuple[list[dict], list[dict]]:
|
||||
"""Check each prompt file for config header and progression conditions."""
|
||||
findings = []
|
||||
prompt_details = []
|
||||
|
||||
# Look for numbered prompt files at skill root
|
||||
prompt_files = sorted(
|
||||
f for f in skill_path.iterdir()
|
||||
if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name)
|
||||
)
|
||||
if not prompt_files:
|
||||
return prompt_details, findings
|
||||
|
||||
for f in prompt_files:
|
||||
content = f.read_text(encoding='utf-8')
|
||||
rel_path = f.name
|
||||
detail = {'file': f.name, 'has_config_header': False, 'has_progression': False}
|
||||
|
||||
# Config header check
|
||||
if '{communication_language}' in content or '{document_output_language}' in content:
|
||||
detail['has_config_header'] = True
|
||||
else:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'config-header',
|
||||
'issue': 'No config header with language variables found',
|
||||
})
|
||||
|
||||
# Progression condition check (look for progression-related keywords near end)
|
||||
lower = content.lower()
|
||||
prog_keywords = ['progress', 'advance', 'move to', 'next stage', 'when complete',
|
||||
'proceed to', 'transition', 'completion criteria']
|
||||
if any(kw in lower for kw in prog_keywords):
|
||||
detail['has_progression'] = True
|
||||
else:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': len(content.split('\n')),
|
||||
'severity': 'high', 'category': 'progression',
|
||||
'issue': 'No progression condition keywords found',
|
||||
})
|
||||
|
||||
# Directness checks
|
||||
for pattern, message in DIRECTNESS_PATTERNS:
|
||||
for m in re.finditer(pattern, content, re.IGNORECASE):
|
||||
line_num = content[:m.start()].count('\n') + 1
|
||||
findings.append({
|
||||
'file': rel_path, 'line': line_num,
|
||||
'severity': 'low', 'category': 'language',
|
||||
'issue': message,
|
||||
})
|
||||
|
||||
# Template artifacts
|
||||
findings.extend(find_template_artifacts(f, rel_path))
|
||||
|
||||
prompt_details.append(detail)
|
||||
|
||||
return prompt_details, findings
|
||||
|
||||
|
||||
def detect_workflow_type(skill_content: str, has_prompts: bool) -> str:
|
||||
"""Detect workflow type from SKILL.md content."""
|
||||
has_stage_refs = bool(re.search(r'(?:prompts/)?\d+-\S+\.md', skill_content))
|
||||
has_routing = bool(re.search(r'(?i)(rout|stage|branch|path)', skill_content))
|
||||
|
||||
if has_stage_refs or (has_prompts and has_routing):
|
||||
return 'complex'
|
||||
elif re.search(r'(?m)^\d+\.\s', skill_content):
|
||||
return 'simple-workflow'
|
||||
else:
|
||||
return 'simple-utility'
|
||||
|
||||
|
||||
def scan_workflow_integrity(skill_path: Path) -> dict:
|
||||
"""Run all deterministic workflow integrity checks."""
|
||||
all_findings = []
|
||||
|
||||
# Read SKILL.md
|
||||
skill_md = skill_path / 'SKILL.md'
|
||||
if not skill_md.exists():
|
||||
return {
|
||||
'scanner': 'workflow-integrity-prepass',
|
||||
'script': 'prepass-workflow-integrity.py',
|
||||
'version': '1.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': 'fail',
|
||||
'issues': [{'file': 'SKILL.md', 'line': 1, 'severity': 'critical',
|
||||
'category': 'missing-file', 'issue': 'SKILL.md does not exist'}],
|
||||
'summary': {'total_issues': 1, 'by_severity': {'critical': 1, 'high': 0, 'medium': 0, 'low': 0}},
|
||||
}
|
||||
|
||||
skill_content = skill_md.read_text(encoding='utf-8')
|
||||
|
||||
# Frontmatter
|
||||
frontmatter, fm_findings = parse_frontmatter(skill_content)
|
||||
all_findings.extend(fm_findings)
|
||||
|
||||
# Sections
|
||||
sections = extract_sections(skill_content)
|
||||
section_findings = check_required_sections(sections)
|
||||
all_findings.extend(section_findings)
|
||||
|
||||
# Template artifacts in SKILL.md
|
||||
all_findings.extend(find_template_artifacts(skill_md, 'SKILL.md'))
|
||||
|
||||
# Directness checks in SKILL.md
|
||||
for pattern, message in DIRECTNESS_PATTERNS:
|
||||
for m in re.finditer(pattern, skill_content, re.IGNORECASE):
|
||||
line_num = skill_content[:m.start()].count('\n') + 1
|
||||
all_findings.append({
|
||||
'file': 'SKILL.md', 'line': line_num,
|
||||
'severity': 'low', 'category': 'language',
|
||||
'issue': message,
|
||||
})
|
||||
|
||||
# Workflow type
|
||||
has_prompts = any(
|
||||
f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name)
|
||||
for f in skill_path.iterdir()
|
||||
)
|
||||
workflow_type = detect_workflow_type(skill_content, has_prompts)
|
||||
|
||||
# Stage cross-reference
|
||||
stage_summary, stage_findings = cross_reference_stages(skill_path, skill_content)
|
||||
all_findings.extend(stage_findings)
|
||||
|
||||
# Prompt basics
|
||||
prompt_details, prompt_findings = check_prompt_basics(skill_path)
|
||||
all_findings.extend(prompt_findings)
|
||||
|
||||
# Build severity summary
|
||||
by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
|
||||
for f in all_findings:
|
||||
sev = f['severity']
|
||||
if sev in by_severity:
|
||||
by_severity[sev] += 1
|
||||
|
||||
status = 'pass'
|
||||
if by_severity['critical'] > 0:
|
||||
status = 'fail'
|
||||
elif by_severity['high'] > 0:
|
||||
status = 'warning'
|
||||
|
||||
return {
|
||||
'scanner': 'workflow-integrity-prepass',
|
||||
'script': 'prepass-workflow-integrity.py',
|
||||
'version': '1.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': status,
|
||||
'metadata': {
|
||||
'frontmatter': frontmatter,
|
||||
'sections': sections,
|
||||
'workflow_type': workflow_type,
|
||||
},
|
||||
'stage_summary': stage_summary,
|
||||
'prompt_details': prompt_details,
|
||||
'issues': all_findings,
|
||||
'summary': {
|
||||
'total_issues': len(all_findings),
|
||||
'by_severity': by_severity,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Deterministic pre-pass for workflow integrity scanning',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_workflow_integrity(args.skill_path)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0 if result['status'] == 'pass' else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic path standards scanner for BMad skills.
|
||||
|
||||
Validates all .md and .json files against BMad path conventions:
|
||||
1. {project-root} only valid before /_bmad
|
||||
2. Bare _bmad references must have {project-root} prefix
|
||||
3. Config variables used directly (no double-prefix)
|
||||
4. Skill-internal paths must use ./ prefix (references/, scripts/, assets/)
|
||||
5. No ../ parent directory references
|
||||
6. No absolute paths
|
||||
7. Frontmatter allows only name and description
|
||||
8. No .md files at skill root except SKILL.md
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Patterns to detect
|
||||
# {project-root} NOT followed by /_bmad
|
||||
PROJECT_ROOT_NOT_BMAD_RE = re.compile(r'\{project-root\}/(?!_bmad)')
|
||||
# Bare _bmad without {project-root} prefix — match _bmad at word boundary
|
||||
# but not when preceded by {project-root}/
|
||||
BARE_BMAD_RE = re.compile(r'(?<!\{project-root\}/)_bmad[/\s]')
|
||||
# Absolute paths
|
||||
ABSOLUTE_PATH_RE = re.compile(r'(?:^|[\s"`\'(])(/(?:Users|home|opt|var|tmp|etc|usr)/\S+)', re.MULTILINE)
|
||||
HOME_PATH_RE = re.compile(r'(?:^|[\s"`\'(])(~/\S+)', re.MULTILINE)
|
||||
# Parent directory reference (still invalid)
|
||||
RELATIVE_DOT_RE = re.compile(r'(?:^|[\s"`\'(])(\.\./\S+)', re.MULTILINE)
|
||||
# Bare skill-internal paths without ./ prefix
|
||||
# Match references/, scripts/, assets/ when NOT preceded by ./
|
||||
BARE_INTERNAL_RE = re.compile(r'(?:^|[\s"`\'(])(?<!\./)((?:references|scripts|assets)/\S+)', re.MULTILINE)
|
||||
|
||||
# Fenced code block detection (to skip examples showing wrong patterns)
|
||||
FENCE_RE = re.compile(r'^```', re.MULTILINE)
|
||||
|
||||
# Valid frontmatter keys
|
||||
VALID_FRONTMATTER_KEYS = {'name', 'description'}
|
||||
|
||||
|
||||
def is_in_fenced_block(content: str, pos: int) -> bool:
|
||||
"""Check if a position is inside a fenced code block."""
|
||||
fences = [m.start() for m in FENCE_RE.finditer(content[:pos])]
|
||||
# Odd number of fences before pos means we're inside a block
|
||||
return len(fences) % 2 == 1
|
||||
|
||||
|
||||
def get_line_number(content: str, pos: int) -> int:
|
||||
"""Get 1-based line number for a position in content."""
|
||||
return content[:pos].count('\n') + 1
|
||||
|
||||
|
||||
def check_frontmatter(content: str, filepath: Path) -> list[dict]:
|
||||
"""Validate SKILL.md frontmatter contains only allowed keys."""
|
||||
findings = []
|
||||
if filepath.name != 'SKILL.md':
|
||||
return findings
|
||||
|
||||
if not content.startswith('---'):
|
||||
findings.append({
|
||||
'file': filepath.name,
|
||||
'line': 1,
|
||||
'severity': 'critical',
|
||||
'category': 'frontmatter',
|
||||
'title': 'SKILL.md missing frontmatter block',
|
||||
'detail': 'SKILL.md must start with --- frontmatter containing name and description',
|
||||
'action': 'Add frontmatter with name and description fields',
|
||||
})
|
||||
return findings
|
||||
|
||||
# Find closing ---
|
||||
end = content.find('\n---', 3)
|
||||
if end == -1:
|
||||
findings.append({
|
||||
'file': filepath.name,
|
||||
'line': 1,
|
||||
'severity': 'critical',
|
||||
'category': 'frontmatter',
|
||||
'title': 'SKILL.md frontmatter block not closed',
|
||||
'detail': 'Missing closing --- for frontmatter',
|
||||
'action': 'Add closing --- after frontmatter fields',
|
||||
})
|
||||
return findings
|
||||
|
||||
frontmatter = content[4:end]
|
||||
for i, line in enumerate(frontmatter.split('\n'), start=2):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
if ':' in line:
|
||||
key = line.split(':', 1)[0].strip()
|
||||
if key not in VALID_FRONTMATTER_KEYS:
|
||||
findings.append({
|
||||
'file': filepath.name,
|
||||
'line': i,
|
||||
'severity': 'high',
|
||||
'category': 'frontmatter',
|
||||
'title': f'Invalid frontmatter key: {key}',
|
||||
'detail': f'Only {", ".join(sorted(VALID_FRONTMATTER_KEYS))} are allowed in frontmatter',
|
||||
'action': f'Remove {key} from frontmatter — use as content field in SKILL.md body instead',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def check_root_md_files(skill_path: Path) -> list[dict]:
|
||||
"""Check that no .md files exist at skill root except SKILL.md."""
|
||||
findings = []
|
||||
for md_file in skill_path.glob('*.md'):
|
||||
if md_file.name != 'SKILL.md':
|
||||
findings.append({
|
||||
'file': md_file.name,
|
||||
'line': 0,
|
||||
'severity': 'high',
|
||||
'category': 'structure',
|
||||
'title': f'Prompt file at skill root: {md_file.name}',
|
||||
'detail': 'All progressive disclosure content must be in ./references/ — only SKILL.md belongs at root',
|
||||
'action': f'Move {md_file.name} to references/{md_file.name}',
|
||||
})
|
||||
return findings
|
||||
|
||||
|
||||
def scan_file(filepath: Path, skip_fenced: bool = True) -> list[dict]:
|
||||
"""Scan a single file for path standard violations."""
|
||||
findings = []
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
rel_path = filepath.name
|
||||
|
||||
checks = [
|
||||
(PROJECT_ROOT_NOT_BMAD_RE, 'project-root-not-bmad', 'critical',
|
||||
'{project-root} used for non-_bmad path — only valid use is {project-root}/_bmad/...'),
|
||||
(ABSOLUTE_PATH_RE, 'absolute-path', 'high',
|
||||
'Absolute path found — not portable across machines'),
|
||||
(HOME_PATH_RE, 'absolute-path', 'high',
|
||||
'Home directory path (~/) found — environment-specific'),
|
||||
(RELATIVE_DOT_RE, 'relative-prefix', 'high',
|
||||
'Parent directory reference (../) found — fragile, breaks with reorganization'),
|
||||
(BARE_INTERNAL_RE, 'bare-internal-path', 'high',
|
||||
'Bare skill-internal path without ./ prefix — use ./references/, ./scripts/, ./assets/ to distinguish from {project-root} paths'),
|
||||
]
|
||||
|
||||
for pattern, category, severity, message in checks:
|
||||
for match in pattern.finditer(content):
|
||||
pos = match.start()
|
||||
if skip_fenced and is_in_fenced_block(content, pos):
|
||||
continue
|
||||
line_num = get_line_number(content, pos)
|
||||
line_content = content.split('\n')[line_num - 1].strip()
|
||||
findings.append({
|
||||
'file': rel_path,
|
||||
'line': line_num,
|
||||
'severity': severity,
|
||||
'category': category,
|
||||
'title': message,
|
||||
'detail': line_content[:120],
|
||||
'action': '',
|
||||
})
|
||||
|
||||
# Bare _bmad check — more nuanced, need to avoid false positives
|
||||
# inside {project-root}/_bmad which is correct
|
||||
for match in BARE_BMAD_RE.finditer(content):
|
||||
pos = match.start()
|
||||
if skip_fenced and is_in_fenced_block(content, pos):
|
||||
continue
|
||||
start = max(0, pos - 30)
|
||||
before = content[start:pos]
|
||||
if '{project-root}/' in before:
|
||||
continue
|
||||
line_num = get_line_number(content, pos)
|
||||
line_content = content.split('\n')[line_num - 1].strip()
|
||||
findings.append({
|
||||
'file': rel_path,
|
||||
'line': line_num,
|
||||
'severity': 'high',
|
||||
'category': 'bare-bmad',
|
||||
'title': 'Bare _bmad reference without {project-root} prefix',
|
||||
'detail': line_content[:120],
|
||||
'action': '',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def scan_skill(skill_path: Path, skip_fenced: bool = True) -> dict:
|
||||
"""Scan all .md and .json files in a skill directory."""
|
||||
all_findings = []
|
||||
|
||||
# Check for .md files at root that aren't SKILL.md
|
||||
all_findings.extend(check_root_md_files(skill_path))
|
||||
|
||||
# Check SKILL.md frontmatter
|
||||
skill_md = skill_path / 'SKILL.md'
|
||||
if skill_md.exists():
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
all_findings.extend(check_frontmatter(content, skill_md))
|
||||
|
||||
# Find all .md and .json files
|
||||
md_files = sorted(list(skill_path.rglob('*.md')) + list(skill_path.rglob('*.json')))
|
||||
if not md_files:
|
||||
print(f"Warning: No .md or .json files found in {skill_path}", file=sys.stderr)
|
||||
|
||||
files_scanned = []
|
||||
for md_file in md_files:
|
||||
rel = md_file.relative_to(skill_path)
|
||||
files_scanned.append(str(rel))
|
||||
file_findings = scan_file(md_file, skip_fenced)
|
||||
for f in file_findings:
|
||||
f['file'] = str(rel)
|
||||
all_findings.extend(file_findings)
|
||||
|
||||
# Build summary
|
||||
by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
|
||||
by_category = {
|
||||
'project_root_not_bmad': 0,
|
||||
'bare_bmad': 0,
|
||||
'double_prefix': 0,
|
||||
'absolute_path': 0,
|
||||
'relative_prefix': 0,
|
||||
'bare_internal_path': 0,
|
||||
'frontmatter': 0,
|
||||
'structure': 0,
|
||||
}
|
||||
|
||||
for f in all_findings:
|
||||
sev = f['severity']
|
||||
if sev in by_severity:
|
||||
by_severity[sev] += 1
|
||||
cat = f['category'].replace('-', '_')
|
||||
if cat in by_category:
|
||||
by_category[cat] += 1
|
||||
|
||||
return {
|
||||
'scanner': 'path-standards',
|
||||
'script': 'scan-path-standards.py',
|
||||
'version': '2.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'files_scanned': files_scanned,
|
||||
'status': 'pass' if not all_findings else 'fail',
|
||||
'findings': all_findings,
|
||||
'assessments': {},
|
||||
'summary': {
|
||||
'total_findings': len(all_findings),
|
||||
'by_severity': by_severity,
|
||||
'by_category': by_category,
|
||||
'assessment': 'Path standards scan complete',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Scan BMad skill for path standard violations',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--include-fenced',
|
||||
action='store_true',
|
||||
help='Also check inside fenced code blocks (by default they are skipped)',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_skill(args.skill_path, skip_fenced=not args.include_fenced)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0 if result['status'] == 'pass' else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
+745
@@ -0,0 +1,745 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic scripts scanner for BMad skills.
|
||||
|
||||
Validates scripts in a skill's scripts/ folder for:
|
||||
- PEP 723 inline dependencies (Python)
|
||||
- Shebang, set -e, portability (Shell)
|
||||
- Version pinning for npx/uvx
|
||||
- Agentic design: no input(), has argparse/--help, JSON output, exit codes
|
||||
- Unit test existence
|
||||
- Over-engineering signals (line count, simple-op imports)
|
||||
- External lint: ruff (Python), shellcheck (Bash), biome (JS/TS)
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.9"
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# External Linter Integration
|
||||
# =============================================================================
|
||||
|
||||
def _run_command(cmd: list[str], timeout: int = 30) -> tuple[int, str, str]:
|
||||
"""Run a command and return (returncode, stdout, stderr)."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
return result.returncode, result.stdout, result.stderr
|
||||
except FileNotFoundError:
|
||||
return -1, '', f'Command not found: {cmd[0]}'
|
||||
except subprocess.TimeoutExpired:
|
||||
return -2, '', f'Command timed out after {timeout}s: {" ".join(cmd)}'
|
||||
|
||||
|
||||
def _find_uv() -> str | None:
|
||||
"""Find uv binary on PATH."""
|
||||
return shutil.which('uv')
|
||||
|
||||
|
||||
def _find_npx() -> str | None:
|
||||
"""Find npx binary on PATH."""
|
||||
return shutil.which('npx')
|
||||
|
||||
|
||||
def lint_python_ruff(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Run ruff on a Python file via uv. Returns lint findings."""
|
||||
uv = _find_uv()
|
||||
if not uv:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': 'uv not found on PATH — cannot run ruff for Python linting',
|
||||
'detail': '',
|
||||
'action': 'Install uv: https://docs.astral.sh/uv/getting-started/installation/',
|
||||
}]
|
||||
|
||||
rc, stdout, stderr = _run_command([
|
||||
uv, 'run', 'ruff', 'check', '--output-format', 'json', str(filepath),
|
||||
])
|
||||
|
||||
if rc == -1:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': f'Failed to run ruff via uv: {stderr.strip()}',
|
||||
'detail': '',
|
||||
'action': 'Ensure uv can install and run ruff: uv run ruff --version',
|
||||
}]
|
||||
|
||||
if rc == -2:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'ruff timed out on {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
# ruff outputs JSON array on stdout (even on rc=1 when issues found)
|
||||
findings = []
|
||||
try:
|
||||
issues = json.loads(stdout) if stdout.strip() else []
|
||||
except json.JSONDecodeError:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'Failed to parse ruff output for {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
for issue in issues:
|
||||
fix_msg = issue.get('fix', {}).get('message', '') if issue.get('fix') else ''
|
||||
findings.append({
|
||||
'file': rel_path,
|
||||
'line': issue.get('location', {}).get('row', 0),
|
||||
'severity': 'high',
|
||||
'category': 'lint',
|
||||
'title': f'[{issue.get("code", "?")}] {issue.get("message", "")}',
|
||||
'detail': '',
|
||||
'action': fix_msg or f'See https://docs.astral.sh/ruff/rules/{issue.get("code", "")}',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def lint_shell_shellcheck(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Run shellcheck on a shell script via uv. Returns lint findings."""
|
||||
uv = _find_uv()
|
||||
if not uv:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': 'uv not found on PATH — cannot run shellcheck for shell linting',
|
||||
'detail': '',
|
||||
'action': 'Install uv: https://docs.astral.sh/uv/getting-started/installation/',
|
||||
}]
|
||||
|
||||
rc, stdout, stderr = _run_command([
|
||||
uv, 'run', '--with', 'shellcheck-py',
|
||||
'shellcheck', '--format', 'json', str(filepath),
|
||||
])
|
||||
|
||||
if rc == -1:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': f'Failed to run shellcheck via uv: {stderr.strip()}',
|
||||
'detail': '',
|
||||
'action': 'Ensure uv can install shellcheck-py: uv run --with shellcheck-py shellcheck --version',
|
||||
}]
|
||||
|
||||
if rc == -2:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'shellcheck timed out on {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
findings = []
|
||||
# shellcheck outputs JSON on stdout (rc=1 when issues found)
|
||||
raw = stdout.strip() or stderr.strip()
|
||||
try:
|
||||
issues = json.loads(raw) if raw else []
|
||||
except json.JSONDecodeError:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'Failed to parse shellcheck output for {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
# Map shellcheck levels to our severity
|
||||
level_map = {'error': 'high', 'warning': 'high', 'info': 'high', 'style': 'medium'}
|
||||
|
||||
for issue in issues:
|
||||
sc_code = issue.get('code', '')
|
||||
findings.append({
|
||||
'file': rel_path,
|
||||
'line': issue.get('line', 0),
|
||||
'severity': level_map.get(issue.get('level', ''), 'high'),
|
||||
'category': 'lint',
|
||||
'title': f'[SC{sc_code}] {issue.get("message", "")}',
|
||||
'detail': '',
|
||||
'action': f'See https://www.shellcheck.net/wiki/SC{sc_code}',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def lint_node_biome(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Run biome on a JS/TS file via npx. Returns lint findings."""
|
||||
npx = _find_npx()
|
||||
if not npx:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': 'npx not found on PATH — cannot run biome for JS/TS linting',
|
||||
'detail': '',
|
||||
'action': 'Install Node.js 20+: https://nodejs.org/',
|
||||
}]
|
||||
|
||||
rc, stdout, stderr = _run_command([
|
||||
npx, '--yes', '@biomejs/biome', 'lint', '--reporter', 'json', str(filepath),
|
||||
], timeout=60)
|
||||
|
||||
if rc == -1:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'high', 'category': 'lint-setup',
|
||||
'title': f'Failed to run biome via npx: {stderr.strip()}',
|
||||
'detail': '',
|
||||
'action': 'Ensure npx can run biome: npx @biomejs/biome --version',
|
||||
}]
|
||||
|
||||
if rc == -2:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'biome timed out on {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
findings = []
|
||||
# biome outputs JSON on stdout
|
||||
raw = stdout.strip()
|
||||
try:
|
||||
result = json.loads(raw) if raw else {}
|
||||
except json.JSONDecodeError:
|
||||
return [{
|
||||
'file': rel_path, 'line': 0,
|
||||
'severity': 'medium', 'category': 'lint',
|
||||
'title': f'Failed to parse biome output for {rel_path}',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}]
|
||||
|
||||
for diag in result.get('diagnostics', []):
|
||||
loc = diag.get('location', {})
|
||||
start = loc.get('start', {})
|
||||
findings.append({
|
||||
'file': rel_path,
|
||||
'line': start.get('line', 0),
|
||||
'severity': 'high',
|
||||
'category': 'lint',
|
||||
'title': f'[{diag.get("category", "?")}] {diag.get("message", "")}',
|
||||
'detail': '',
|
||||
'action': diag.get('advices', [{}])[0].get('message', '') if diag.get('advices') else '',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# BMad Pattern Checks (Existing)
|
||||
# =============================================================================
|
||||
|
||||
def scan_python_script(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Check a Python script for standards compliance."""
|
||||
findings = []
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# PEP 723 check
|
||||
if '# /// script' not in content:
|
||||
# Only flag if the script has imports (not a trivial script)
|
||||
if 'import ' in content:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'dependencies',
|
||||
'title': 'No PEP 723 inline dependency block (# /// script)',
|
||||
'detail': '',
|
||||
'action': 'Add PEP 723 block with requires-python and dependencies',
|
||||
})
|
||||
else:
|
||||
# Check requires-python is present
|
||||
if 'requires-python' not in content:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'low', 'category': 'dependencies',
|
||||
'title': 'PEP 723 block exists but missing requires-python constraint',
|
||||
'detail': '',
|
||||
'action': 'Add requires-python = ">=3.9" or appropriate version',
|
||||
})
|
||||
|
||||
# requirements.txt reference
|
||||
if 'requirements.txt' in content or 'pip install' in content:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'high', 'category': 'dependencies',
|
||||
'title': 'References requirements.txt or pip install — use PEP 723 inline deps',
|
||||
'detail': '',
|
||||
'action': 'Replace with PEP 723 inline dependency block',
|
||||
})
|
||||
|
||||
# Agentic design checks via AST
|
||||
try:
|
||||
tree = ast.parse(content)
|
||||
except SyntaxError:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'critical', 'category': 'error-handling',
|
||||
'title': 'Python syntax error — script cannot be parsed',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
})
|
||||
return findings
|
||||
|
||||
has_argparse = False
|
||||
has_json_dumps = False
|
||||
has_sys_exit = False
|
||||
imports = set()
|
||||
|
||||
for node in ast.walk(tree):
|
||||
# Track imports
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
imports.add(alias.name)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module:
|
||||
imports.add(node.module)
|
||||
|
||||
# input() calls
|
||||
if isinstance(node, ast.Call):
|
||||
func = node.func
|
||||
if isinstance(func, ast.Name) and func.id == 'input':
|
||||
findings.append({
|
||||
'file': rel_path, 'line': node.lineno,
|
||||
'severity': 'critical', 'category': 'agentic-design',
|
||||
'title': 'input() call found — blocks in non-interactive agent execution',
|
||||
'detail': '',
|
||||
'action': 'Use argparse with required flags instead of interactive prompts',
|
||||
})
|
||||
# json.dumps
|
||||
if isinstance(func, ast.Attribute) and func.attr == 'dumps':
|
||||
has_json_dumps = True
|
||||
# sys.exit
|
||||
if isinstance(func, ast.Attribute) and func.attr == 'exit':
|
||||
has_sys_exit = True
|
||||
if isinstance(func, ast.Name) and func.id == 'exit':
|
||||
has_sys_exit = True
|
||||
|
||||
# argparse
|
||||
if isinstance(node, ast.Attribute) and node.attr == 'ArgumentParser':
|
||||
has_argparse = True
|
||||
|
||||
if not has_argparse and line_count > 20:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'agentic-design',
|
||||
'title': 'No argparse found — script lacks --help self-documentation',
|
||||
'detail': '',
|
||||
'action': 'Add argparse with description and argument help text',
|
||||
})
|
||||
|
||||
if not has_json_dumps and line_count > 20:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'agentic-design',
|
||||
'title': 'No json.dumps found — output may not be structured JSON',
|
||||
'detail': '',
|
||||
'action': 'Use json.dumps for structured output parseable by workflows',
|
||||
})
|
||||
|
||||
if not has_sys_exit and line_count > 20:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'low', 'category': 'agentic-design',
|
||||
'title': 'No sys.exit() calls — may not return meaningful exit codes',
|
||||
'detail': '',
|
||||
'action': 'Return 0=success, 1=fail, 2=error via sys.exit()',
|
||||
})
|
||||
|
||||
# Over-engineering: simple file ops in Python
|
||||
simple_op_imports = {'shutil', 'glob', 'fnmatch'}
|
||||
over_eng = imports & simple_op_imports
|
||||
if over_eng and line_count < 30:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'low', 'category': 'over-engineered',
|
||||
'title': f'Short script ({line_count} lines) imports {", ".join(over_eng)} — may be simpler as bash',
|
||||
'detail': '',
|
||||
'action': 'Consider if cp/mv/find shell commands would suffice',
|
||||
})
|
||||
|
||||
# Very short script
|
||||
if line_count < 5:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'over-engineered',
|
||||
'title': f'Script is only {line_count} lines — could be an inline command',
|
||||
'detail': '',
|
||||
'action': 'Consider inlining this command directly in the prompt',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def scan_shell_script(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Check a shell script for standards compliance."""
|
||||
findings = []
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# Shebang
|
||||
if not lines[0].startswith('#!'):
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'high', 'category': 'portability',
|
||||
'title': 'Missing shebang line',
|
||||
'detail': '',
|
||||
'action': 'Add #!/usr/bin/env bash or #!/usr/bin/env sh',
|
||||
})
|
||||
elif '/usr/bin/env' not in lines[0]:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'portability',
|
||||
'title': f'Shebang uses hardcoded path: {lines[0].strip()}',
|
||||
'detail': '',
|
||||
'action': 'Use #!/usr/bin/env bash for cross-platform compatibility',
|
||||
})
|
||||
|
||||
# set -e
|
||||
if 'set -e' not in content and 'set -euo' not in content:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'error-handling',
|
||||
'title': 'Missing set -e — errors will be silently ignored',
|
||||
'detail': '',
|
||||
'action': 'Add set -e (or set -euo pipefail) near the top',
|
||||
})
|
||||
|
||||
# Hardcoded interpreter paths
|
||||
hardcoded_re = re.compile(r'/usr/bin/(python|ruby|node|perl)\b')
|
||||
for i, line in enumerate(lines, 1):
|
||||
if hardcoded_re.search(line):
|
||||
findings.append({
|
||||
'file': rel_path, 'line': i,
|
||||
'severity': 'medium', 'category': 'portability',
|
||||
'title': f'Hardcoded interpreter path: {line.strip()}',
|
||||
'detail': '',
|
||||
'action': 'Use /usr/bin/env or PATH-based lookup',
|
||||
})
|
||||
|
||||
# GNU-only tools
|
||||
gnu_re = re.compile(r'\b(gsed|gawk|ggrep|gfind)\b')
|
||||
for i, line in enumerate(lines, 1):
|
||||
m = gnu_re.search(line)
|
||||
if m:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': i,
|
||||
'severity': 'medium', 'category': 'portability',
|
||||
'title': f'GNU-only tool: {m.group()} — not available on all platforms',
|
||||
'detail': '',
|
||||
'action': 'Use POSIX-compatible equivalent',
|
||||
})
|
||||
|
||||
# Unquoted variables (basic check)
|
||||
unquoted_re = re.compile(r'(?<!")\$\w+(?!")')
|
||||
for i, line in enumerate(lines, 1):
|
||||
if line.strip().startswith('#'):
|
||||
continue
|
||||
for m in unquoted_re.finditer(line):
|
||||
# Skip inside double-quoted strings (rough heuristic)
|
||||
before = line[:m.start()]
|
||||
if before.count('"') % 2 == 1:
|
||||
continue
|
||||
findings.append({
|
||||
'file': rel_path, 'line': i,
|
||||
'severity': 'low', 'category': 'portability',
|
||||
'title': f'Potentially unquoted variable: {m.group()} — breaks with spaces in paths',
|
||||
'detail': '',
|
||||
'action': f'Use "{m.group()}" with double quotes',
|
||||
})
|
||||
|
||||
# npx/uvx without version pinning
|
||||
no_pin_re = re.compile(r'\b(npx|uvx)\s+([a-zA-Z][\w-]+)(?!\S*@)')
|
||||
for i, line in enumerate(lines, 1):
|
||||
if line.strip().startswith('#'):
|
||||
continue
|
||||
m = no_pin_re.search(line)
|
||||
if m:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': i,
|
||||
'severity': 'medium', 'category': 'dependencies',
|
||||
'title': f'{m.group(1)} {m.group(2)} without version pinning',
|
||||
'detail': '',
|
||||
'action': f'Pin version: {m.group(1)} {m.group(2)}@<version>',
|
||||
})
|
||||
|
||||
# Very short script
|
||||
if line_count < 5:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'over-engineered',
|
||||
'title': f'Script is only {line_count} lines — could be an inline command',
|
||||
'detail': '',
|
||||
'action': 'Consider inlining this command directly in the prompt',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def scan_node_script(filepath: Path, rel_path: str) -> list[dict]:
|
||||
"""Check a JS/TS script for standards compliance."""
|
||||
findings = []
|
||||
content = filepath.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# npx/uvx without version pinning
|
||||
no_pin = re.compile(r'\b(npx|uvx)\s+([a-zA-Z][\w-]+)(?!\S*@)')
|
||||
for i, line in enumerate(lines, 1):
|
||||
m = no_pin.search(line)
|
||||
if m:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': i,
|
||||
'severity': 'medium', 'category': 'dependencies',
|
||||
'title': f'{m.group(1)} {m.group(2)} without version pinning',
|
||||
'detail': '',
|
||||
'action': f'Pin version: {m.group(1)} {m.group(2)}@<version>',
|
||||
})
|
||||
|
||||
# Very short script
|
||||
if line_count < 5:
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'over-engineered',
|
||||
'title': f'Script is only {line_count} lines — could be an inline command',
|
||||
'detail': '',
|
||||
'action': 'Consider inlining this command directly in the prompt',
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Main Scanner
|
||||
# =============================================================================
|
||||
|
||||
def scan_skill_scripts(skill_path: Path) -> dict:
|
||||
"""Scan all scripts in a skill directory."""
|
||||
scripts_dir = skill_path / 'scripts'
|
||||
all_findings = []
|
||||
lint_findings = []
|
||||
script_inventory = {'python': [], 'shell': [], 'node': [], 'other': []}
|
||||
missing_tests = []
|
||||
|
||||
if not scripts_dir.exists():
|
||||
return {
|
||||
'scanner': 'scripts',
|
||||
'script': 'scan-scripts.py',
|
||||
'version': '2.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': 'pass',
|
||||
'findings': [{
|
||||
'file': 'scripts/',
|
||||
'severity': 'info',
|
||||
'category': 'none',
|
||||
'title': 'No scripts/ directory found — nothing to scan',
|
||||
'detail': '',
|
||||
'action': '',
|
||||
}],
|
||||
'assessments': {
|
||||
'lint_summary': {
|
||||
'tools_used': [],
|
||||
'files_linted': 0,
|
||||
'lint_issues': 0,
|
||||
},
|
||||
'script_summary': {
|
||||
'total_scripts': 0,
|
||||
'by_type': script_inventory,
|
||||
'missing_tests': [],
|
||||
},
|
||||
},
|
||||
'summary': {
|
||||
'total_findings': 0,
|
||||
'by_severity': {'critical': 0, 'high': 0, 'medium': 0, 'low': 0},
|
||||
'assessment': '',
|
||||
},
|
||||
}
|
||||
|
||||
# Find all script files (exclude tests/ and __pycache__)
|
||||
script_files = []
|
||||
for f in sorted(scripts_dir.iterdir()):
|
||||
if f.is_file() and f.suffix in ('.py', '.sh', '.bash', '.js', '.ts', '.mjs'):
|
||||
script_files.append(f)
|
||||
|
||||
tests_dir = scripts_dir / 'tests'
|
||||
lint_tools_used = set()
|
||||
|
||||
for script_file in script_files:
|
||||
rel_path = f'scripts/{script_file.name}'
|
||||
ext = script_file.suffix
|
||||
|
||||
if ext == '.py':
|
||||
script_inventory['python'].append(script_file.name)
|
||||
findings = scan_python_script(script_file, rel_path)
|
||||
lf = lint_python_ruff(script_file, rel_path)
|
||||
lint_findings.extend(lf)
|
||||
if lf and not any(f['category'] == 'lint-setup' for f in lf):
|
||||
lint_tools_used.add('ruff')
|
||||
elif ext in ('.sh', '.bash'):
|
||||
script_inventory['shell'].append(script_file.name)
|
||||
findings = scan_shell_script(script_file, rel_path)
|
||||
lf = lint_shell_shellcheck(script_file, rel_path)
|
||||
lint_findings.extend(lf)
|
||||
if lf and not any(f['category'] == 'lint-setup' for f in lf):
|
||||
lint_tools_used.add('shellcheck')
|
||||
elif ext in ('.js', '.ts', '.mjs'):
|
||||
script_inventory['node'].append(script_file.name)
|
||||
findings = scan_node_script(script_file, rel_path)
|
||||
lf = lint_node_biome(script_file, rel_path)
|
||||
lint_findings.extend(lf)
|
||||
if lf and not any(f['category'] == 'lint-setup' for f in lf):
|
||||
lint_tools_used.add('biome')
|
||||
else:
|
||||
script_inventory['other'].append(script_file.name)
|
||||
findings = []
|
||||
|
||||
# Check for unit tests
|
||||
if tests_dir.exists():
|
||||
stem = script_file.stem
|
||||
test_patterns = [
|
||||
f'test_{stem}{ext}', f'test-{stem}{ext}',
|
||||
f'{stem}_test{ext}', f'{stem}-test{ext}',
|
||||
f'test_{stem}.py', f'test-{stem}.py',
|
||||
]
|
||||
has_test = any((tests_dir / t).exists() for t in test_patterns)
|
||||
else:
|
||||
has_test = False
|
||||
|
||||
if not has_test:
|
||||
missing_tests.append(script_file.name)
|
||||
findings.append({
|
||||
'file': rel_path, 'line': 1,
|
||||
'severity': 'medium', 'category': 'tests',
|
||||
'title': f'No unit test found for {script_file.name}',
|
||||
'detail': '',
|
||||
'action': f'Create scripts/tests/test-{script_file.stem}{ext} with test cases',
|
||||
})
|
||||
|
||||
all_findings.extend(findings)
|
||||
|
||||
# Check if tests/ directory exists at all
|
||||
if script_files and not tests_dir.exists():
|
||||
all_findings.append({
|
||||
'file': 'scripts/tests/',
|
||||
'line': 0,
|
||||
'severity': 'high',
|
||||
'category': 'tests',
|
||||
'title': 'scripts/tests/ directory does not exist — no unit tests',
|
||||
'detail': '',
|
||||
'action': 'Create scripts/tests/ with test files for each script',
|
||||
})
|
||||
|
||||
# Merge lint findings into all findings
|
||||
all_findings.extend(lint_findings)
|
||||
|
||||
# Build summary
|
||||
by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
|
||||
by_category: dict[str, int] = {}
|
||||
for f in all_findings:
|
||||
sev = f['severity']
|
||||
if sev in by_severity:
|
||||
by_severity[sev] += 1
|
||||
cat = f['category']
|
||||
by_category[cat] = by_category.get(cat, 0) + 1
|
||||
|
||||
total_scripts = sum(len(v) for v in script_inventory.values())
|
||||
status = 'pass'
|
||||
if by_severity['critical'] > 0:
|
||||
status = 'fail'
|
||||
elif by_severity['high'] > 0:
|
||||
status = 'warning'
|
||||
elif total_scripts == 0:
|
||||
status = 'pass'
|
||||
|
||||
lint_issue_count = sum(1 for f in lint_findings if f['category'] == 'lint')
|
||||
|
||||
return {
|
||||
'scanner': 'scripts',
|
||||
'script': 'scan-scripts.py',
|
||||
'version': '2.0.0',
|
||||
'skill_path': str(skill_path),
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'status': status,
|
||||
'findings': all_findings,
|
||||
'assessments': {
|
||||
'lint_summary': {
|
||||
'tools_used': sorted(lint_tools_used),
|
||||
'files_linted': total_scripts,
|
||||
'lint_issues': lint_issue_count,
|
||||
},
|
||||
'script_summary': {
|
||||
'total_scripts': total_scripts,
|
||||
'by_type': {k: len(v) for k, v in script_inventory.items()},
|
||||
'scripts': {k: v for k, v in script_inventory.items() if v},
|
||||
'missing_tests': missing_tests,
|
||||
},
|
||||
},
|
||||
'summary': {
|
||||
'total_findings': len(all_findings),
|
||||
'by_severity': by_severity,
|
||||
'by_category': by_category,
|
||||
'assessment': '',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Scan BMad skill scripts for quality, portability, agentic design, and lint issues',
|
||||
)
|
||||
parser.add_argument(
|
||||
'skill_path',
|
||||
type=Path,
|
||||
help='Path to the skill directory to scan',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=Path,
|
||||
help='Write JSON output to file instead of stdout',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.skill_path.is_dir():
|
||||
print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
result = scan_skill_scripts(args.skill_path)
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(output)
|
||||
print(f"Results written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
return 0 if result['status'] == 'pass' else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user