""" Parse Gherkin .feature files. Simple parser without external dependencies - parses the subset we need. For full Gherkin support, could use gherkin-python package later. """ import re from pathlib import Path from typing import Optional from dataclasses import dataclass, field @dataclass class GherkinScenario: """A Gherkin scenario.""" name: str description: str tags: list[str] = field(default_factory=list) steps: list[str] = field(default_factory=list) examples: dict = field(default_factory=dict) scenario_type: str = "Scenario" # or "Scenario Outline" / "Esquema del escenario" @dataclass class GherkinFeature: """A parsed Gherkin feature file.""" name: str description: str file_path: str language: str = "en" # or "es" tags: list[str] = field(default_factory=list) background: Optional[dict] = None scenarios: list[GherkinScenario] = field(default_factory=list) def parse_feature_file(file_path: Path) -> Optional[GherkinFeature]: """ Parse a Gherkin .feature file. Supports both English and Spanish keywords. Extracts: Feature name, scenarios, tags, steps. """ if not file_path.exists(): return None try: content = file_path.read_text(encoding='utf-8') except Exception: return None # Detect language language = "en" if re.search(r"#\s*language:\s*es", content): language = "es" # Keywords by language if language == "es": feature_kw = r"Característica" scenario_kw = r"Escenario" outline_kw = r"Esquema del escenario" background_kw = r"Antecedentes" examples_kw = r"Ejemplos" given_kw = r"Dado" when_kw = r"Cuando" then_kw = r"Entonces" and_kw = r"Y" but_kw = r"Pero" else: feature_kw = r"Feature" scenario_kw = r"Scenario" outline_kw = r"Scenario Outline" background_kw = r"Background" examples_kw = r"Examples" given_kw = r"Given" when_kw = r"When" then_kw = r"Then" and_kw = r"And" but_kw = r"But" lines = content.split('\n') # Extract feature feature_name = None feature_desc = [] feature_tags = [] scenarios = [] current_scenario = None current_tags = [] i = 0 while i < len(lines): line = lines[i].strip() # Skip comments and empty lines if not line or line.startswith('#'): i += 1 continue # Tags if line.startswith('@'): tags = re.findall(r'@[\w-]+', line) current_tags.extend(tags) i += 1 continue # Feature feature_match = re.match(rf"^{feature_kw}:\s*(.+)", line) if feature_match: feature_name = feature_match.group(1).strip() feature_tags = current_tags.copy() current_tags = [] # Read feature description i += 1 while i < len(lines): line = lines[i].strip() if not line or line.startswith('#'): i += 1 continue # Stop at scenario or background if re.match(rf"^({scenario_kw}|{outline_kw}|{background_kw}):", line): break feature_desc.append(line) i += 1 continue # Scenario scenario_match = re.match(rf"^({scenario_kw}|{outline_kw}):\s*(.+)", line) if scenario_match: # Save previous scenario if current_scenario: scenarios.append(current_scenario) scenario_type = scenario_match.group(1) scenario_name = scenario_match.group(2).strip() current_scenario = GherkinScenario( name=scenario_name, description="", tags=current_tags.copy(), steps=[], scenario_type=scenario_type ) current_tags = [] # Read scenario steps i += 1 while i < len(lines): line = lines[i].strip() # Empty or comment if not line or line.startswith('#'): i += 1 continue # New scenario or feature-level element if re.match(rf"^({scenario_kw}|{outline_kw}|{examples_kw}):", line): break # Tags (start of next scenario) if line.startswith('@'): break # Step keywords if re.match(rf"^({given_kw}|{when_kw}|{then_kw}|{and_kw}|{but_kw})\s+", line): current_scenario.steps.append(line) i += 1 continue i += 1 # Add last scenario if current_scenario: scenarios.append(current_scenario) if not feature_name: return None return GherkinFeature( name=feature_name, description=" ".join(feature_desc), file_path=str(file_path), language=language, tags=feature_tags, scenarios=scenarios ) def discover_features(features_dir: Path) -> list[GherkinFeature]: """ Discover all .feature files in the features directory. """ if not features_dir.exists(): return [] features = [] for feature_file in features_dir.rglob("*.feature"): parsed = parse_feature_file(feature_file) if parsed: features.append(parsed) return features def extract_tags_from_features(features: list[GherkinFeature]) -> set[str]: """Extract all unique tags from features.""" tags = set() for feature in features: tags.update(feature.tags) for scenario in feature.scenarios: tags.update(scenario.tags) return tags def get_feature_names(features: list[GherkinFeature]) -> list[str]: """Get list of feature names.""" return [f.name for f in features] def get_scenario_names(features: list[GherkinFeature]) -> list[str]: """Get list of all scenario names across all features.""" scenarios = [] for feature in features: for scenario in feature.scenarios: scenarios.append(scenario.name) return scenarios