232 lines
6.2 KiB
Python
232 lines
6.2 KiB
Python
"""
|
|
Parse Gherkin .feature files.
|
|
|
|
Simple parser without external dependencies - parses the subset we need.
|
|
For full Gherkin support, could use gherkin-python package later.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class GherkinScenario:
|
|
"""A Gherkin scenario."""
|
|
name: str
|
|
description: str
|
|
tags: list[str] = field(default_factory=list)
|
|
steps: list[str] = field(default_factory=list)
|
|
examples: dict = field(default_factory=dict)
|
|
scenario_type: str = "Scenario" # or "Scenario Outline" / "Esquema del escenario"
|
|
|
|
|
|
@dataclass
|
|
class GherkinFeature:
|
|
"""A parsed Gherkin feature file."""
|
|
name: str
|
|
description: str
|
|
file_path: str
|
|
language: str = "en" # or "es"
|
|
tags: list[str] = field(default_factory=list)
|
|
background: Optional[dict] = None
|
|
scenarios: list[GherkinScenario] = field(default_factory=list)
|
|
|
|
|
|
def parse_feature_file(file_path: Path) -> Optional[GherkinFeature]:
|
|
"""
|
|
Parse a Gherkin .feature file.
|
|
|
|
Supports both English and Spanish keywords.
|
|
Extracts: Feature name, scenarios, tags, steps.
|
|
"""
|
|
if not file_path.exists():
|
|
return None
|
|
|
|
try:
|
|
content = file_path.read_text(encoding='utf-8')
|
|
except Exception:
|
|
return None
|
|
|
|
# Detect language
|
|
language = "en"
|
|
if re.search(r"#\s*language:\s*es", content):
|
|
language = "es"
|
|
|
|
# Keywords by language
|
|
if language == "es":
|
|
feature_kw = r"Característica"
|
|
scenario_kw = r"Escenario"
|
|
outline_kw = r"Esquema del escenario"
|
|
background_kw = r"Antecedentes"
|
|
examples_kw = r"Ejemplos"
|
|
given_kw = r"Dado"
|
|
when_kw = r"Cuando"
|
|
then_kw = r"Entonces"
|
|
and_kw = r"Y"
|
|
but_kw = r"Pero"
|
|
else:
|
|
feature_kw = r"Feature"
|
|
scenario_kw = r"Scenario"
|
|
outline_kw = r"Scenario Outline"
|
|
background_kw = r"Background"
|
|
examples_kw = r"Examples"
|
|
given_kw = r"Given"
|
|
when_kw = r"When"
|
|
then_kw = r"Then"
|
|
and_kw = r"And"
|
|
but_kw = r"But"
|
|
|
|
lines = content.split('\n')
|
|
|
|
# Extract feature
|
|
feature_name = None
|
|
feature_desc = []
|
|
feature_tags = []
|
|
scenarios = []
|
|
current_scenario = None
|
|
current_tags = []
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
|
|
# Skip comments and empty lines
|
|
if not line or line.startswith('#'):
|
|
i += 1
|
|
continue
|
|
|
|
# Tags
|
|
if line.startswith('@'):
|
|
tags = re.findall(r'@[\w-]+', line)
|
|
current_tags.extend(tags)
|
|
i += 1
|
|
continue
|
|
|
|
# Feature
|
|
feature_match = re.match(rf"^{feature_kw}:\s*(.+)", line)
|
|
if feature_match:
|
|
feature_name = feature_match.group(1).strip()
|
|
feature_tags = current_tags.copy()
|
|
current_tags = []
|
|
|
|
# Read feature description
|
|
i += 1
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
if not line or line.startswith('#'):
|
|
i += 1
|
|
continue
|
|
# Stop at scenario or background
|
|
if re.match(rf"^({scenario_kw}|{outline_kw}|{background_kw}):", line):
|
|
break
|
|
feature_desc.append(line)
|
|
i += 1
|
|
continue
|
|
|
|
# Scenario
|
|
scenario_match = re.match(rf"^({scenario_kw}|{outline_kw}):\s*(.+)", line)
|
|
if scenario_match:
|
|
# Save previous scenario
|
|
if current_scenario:
|
|
scenarios.append(current_scenario)
|
|
|
|
scenario_type = scenario_match.group(1)
|
|
scenario_name = scenario_match.group(2).strip()
|
|
|
|
current_scenario = GherkinScenario(
|
|
name=scenario_name,
|
|
description="",
|
|
tags=current_tags.copy(),
|
|
steps=[],
|
|
scenario_type=scenario_type
|
|
)
|
|
current_tags = []
|
|
|
|
# Read scenario steps
|
|
i += 1
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
|
|
# Empty or comment
|
|
if not line or line.startswith('#'):
|
|
i += 1
|
|
continue
|
|
|
|
# New scenario or feature-level element
|
|
if re.match(rf"^({scenario_kw}|{outline_kw}|{examples_kw}):", line):
|
|
break
|
|
|
|
# Tags (start of next scenario)
|
|
if line.startswith('@'):
|
|
break
|
|
|
|
# Step keywords
|
|
if re.match(rf"^({given_kw}|{when_kw}|{then_kw}|{and_kw}|{but_kw})\s+", line):
|
|
current_scenario.steps.append(line)
|
|
|
|
i += 1
|
|
continue
|
|
|
|
i += 1
|
|
|
|
# Add last scenario
|
|
if current_scenario:
|
|
scenarios.append(current_scenario)
|
|
|
|
if not feature_name:
|
|
return None
|
|
|
|
return GherkinFeature(
|
|
name=feature_name,
|
|
description=" ".join(feature_desc),
|
|
file_path=str(file_path),
|
|
language=language,
|
|
tags=feature_tags,
|
|
scenarios=scenarios
|
|
)
|
|
|
|
|
|
def discover_features(features_dir: Path) -> list[GherkinFeature]:
|
|
"""
|
|
Discover all .feature files in the features directory.
|
|
"""
|
|
if not features_dir.exists():
|
|
return []
|
|
|
|
features = []
|
|
|
|
for feature_file in features_dir.rglob("*.feature"):
|
|
parsed = parse_feature_file(feature_file)
|
|
if parsed:
|
|
features.append(parsed)
|
|
|
|
return features
|
|
|
|
|
|
def extract_tags_from_features(features: list[GherkinFeature]) -> set[str]:
|
|
"""Extract all unique tags from features."""
|
|
tags = set()
|
|
|
|
for feature in features:
|
|
tags.update(feature.tags)
|
|
for scenario in feature.scenarios:
|
|
tags.update(scenario.tags)
|
|
|
|
return tags
|
|
|
|
|
|
def get_feature_names(features: list[GherkinFeature]) -> list[str]:
|
|
"""Get list of feature names."""
|
|
return [f.name for f in features]
|
|
|
|
|
|
def get_scenario_names(features: list[GherkinFeature]) -> list[str]:
|
|
"""Get list of all scenario names across all features."""
|
|
scenarios = []
|
|
for feature in features:
|
|
for scenario in feature.scenarios:
|
|
scenarios.append(scenario.name)
|
|
return scenarios
|