suricata/scripts/check-doc-rules.py
Philippe Antoine d84b6789c9 ci: make doc rules validation strict
not finding anymore problems now
2026-05-05 20:56:25 +02:00

215 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""Extract Suricata rule examples from documentation RST files.
This script scans a documentation tree for ``.. container:: example-rule`` blocks,
converts role markup such as ``:example-rule-emphasis:`any``` back to plain text,
and try to load them with Suricata, outputing invalid rules
"""
from __future__ import annotations
import argparse
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Iterable, Iterator, List, Optional, Tuple
ROLE_RE = re.compile(
r"`?:example-rule-(?:action|header|options|emphasis):`([^`]*)`"
)
RuleWithOrigin = Tuple[str, Path, int]
def indent_width(line: str) -> int:
return len(line) - len(line.lstrip(" "))
def clean_rule_text(text: str) -> str:
# Unescape custom role markup used by docs around rule fragments.
cleaned = ROLE_RE.sub(r"\1", text)
# In docs, trailing '\\' is often used to wrap long rules across lines.
cleaned = re.sub(r"\\\s*\n\s*", " ", cleaned)
# RST often escapes pipe characters in examples.
cleaned = cleaned.replace("\\|", "|")
cleaned = cleaned.replace("\\*", "*")
cleaned = re.sub(r"\s+", " ", cleaned)
return cleaned.strip()
def collect_container_body(lines: List[str], start_idx: int) -> Tuple[str, int]:
container_indent = indent_width(lines[start_idx])
body_lines: List[str] = []
i = start_idx + 1
while i < len(lines):
line = lines[i]
if line.strip() == "":
body_lines.append("")
i += 1
continue
if indent_width(line) <= container_indent:
break
body_lines.append(line)
i += 1
non_empty = [line for line in body_lines if line.strip()]
if non_empty:
min_indent = min(indent_width(line) for line in non_empty)
dedented = [line[min_indent:] if line.strip() else "" for line in body_lines]
else:
dedented = []
return "\n".join(dedented).strip(), i
def extract_rules_from_rst(path: Path) -> Iterator[Tuple[str, int]]:
lines = path.read_text(encoding="utf-8").splitlines()
i = 0
while i < len(lines):
if lines[i].strip() == ".. container:: example-rule":
block_text, i = collect_container_body(lines, i)
if block_text:
cleaned = clean_rule_text(block_text)
yield cleaned, i
continue
i += 1
def iter_rst_files(path: Path) -> Iterable[Path]:
if path.is_file() and path.suffix == ".rst":
return [path]
if path.is_dir():
return sorted(path.rglob("*.rst"))
return []
def resolve_suricata_bin(repo_root: Path, configured: Optional[str]) -> Path:
if configured:
return Path(configured)
in_path = shutil.which("suricata")
if in_path:
return Path(in_path)
candidates = [repo_root / "src" / "suricata", repo_root / "suricata"]
for candidate in candidates:
if candidate.exists():
return candidate
raise SystemExit(
"Unable to find Suricata binary. Use --suricata-bin to provide it."
)
def check_rule_with_suricata(
rule: str,
suricata_bin: Path,
suricata_yaml: Path,
) -> Tuple[bool, str]:
with tempfile.TemporaryDirectory(prefix="doc-rule-check-") as tmpdir:
rule_file = Path(tmpdir) / "rule.rules"
rule_file.write_text(rule + "\n", encoding="utf-8")
cmd = [
str(suricata_bin),
"-T",
"-c", str(suricata_yaml),
"--data-dir="+tmpdir,
"-S", str(rule_file),
'--strict-rule-keywords=all',
"-l", tmpdir,
]
proc = subprocess.run(
cmd,
check=False,
capture_output=True,
text=True,
)
combined = proc.stderr.strip()
return proc.returncode == 0, combined
def main() -> int:
parser = argparse.ArgumentParser(
description="Check Suricata rules from doc RST example-rule containers."
)
parser.add_argument(
"doc_path",
nargs="?",
default="doc",
help="Path to doc directory or .rst file (default: doc)",
)
parser.add_argument(
"--suricata-bin",
default=None,
help="Path to Suricata binary (default: auto-detect)",
)
parser.add_argument(
"--suricata-yaml",
default=None,
help="Path to suricata.yaml (default: <repo>/suricata.yaml)",
)
args = parser.parse_args()
doc_path = Path(args.doc_path)
if not doc_path.exists():
raise SystemExit(f"Invalid doc path: {doc_path}")
repo_root = Path(__file__).resolve().parents[1]
suricata_bin = resolve_suricata_bin(repo_root, args.suricata_bin)
suricata_yaml = (
Path(args.suricata_yaml)
if args.suricata_yaml
else (repo_root / "scripts" / "docrules" / "docrules.yaml")
)
if not suricata_yaml.exists():
raise SystemExit(
f"suricata.yaml not found: {suricata_yaml}. Use --suricata-yaml."
)
rules_with_origin: List[RuleWithOrigin] = []
for rst_file in iter_rst_files(doc_path):
for rule, line_number in extract_rules_from_rst(rst_file):
rules_with_origin.append((rule, rst_file, line_number))
invalid_rules = 0
for index, (rule, source_file, line_number) in enumerate(rules_with_origin, start=1):
is_valid, output_text = check_rule_with_suricata(
rule,
suricata_bin,
suricata_yaml,
)
if not is_valid:
print(
(
f"Invalid rule at #{index} ({source_file}:{line_number})\n"
f"Rule: {rule}\n"
f"Suricata stderr:\n{output_text}\n"
),
end="\n",
)
invalid_rules = invalid_rules + 1
if invalid_rules:
print(
f"Found {invalid_rules} invalid rule(s) out of {len(rules_with_origin)} checked.",
end="\n\n",
)
return 1
print(
f"Found no invalid rule out of {len(rules_with_origin)} checked.",
end="\n\n",
)
return 0
if __name__ == "__main__":
raise SystemExit(main())