Coverage for pytest_beehave/feature_parser.py: 100%
158 statements
« prev ^ index » next coverage.py v7.8.0, created at 2026-04-21 04:49 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2026-04-21 04:49 +0000
1"""Gherkin feature file parser for pytest-beehave."""
3from __future__ import annotations
5import re
6from dataclasses import dataclass
7from pathlib import Path
8from typing import Any, Protocol, cast
10from gherkin import Parser as _GherkinParserImpl
12from pytest_beehave.models import ExampleId, FeatureSlug, RuleSlug
14_ID_TAG_RE: re.Pattern[str] = re.compile(r"@id:([a-f0-9]{8})")
17class GherkinParserProtocol(Protocol):
18 """Protocol for a Gherkin file parser."""
20 def parse(self, text: str) -> dict[str, Any]: # pragma: no cover
21 """Parse Gherkin text into an AST dict."""
22 ...
25class GherkinParser:
26 """Adapter wrapping the gherkin library Parser to match GherkinParserProtocol."""
28 def __init__(self) -> None:
29 """Initialise the underlying gherkin parser."""
30 self._impl = _GherkinParserImpl()
32 def parse(self, text: str) -> dict[str, Any]:
33 """Parse Gherkin text into an AST dict.
35 Args:
36 text: The Gherkin feature file content.
38 Returns:
39 AST as a dict.
40 """
41 return cast(dict[str, Any], self._impl.parse(text))
44@dataclass(frozen=True, slots=True)
45class ParsedStep:
46 """A single step line parsed from a Gherkin Example.
48 Attributes:
49 keyword: The step keyword (Given, When, Then, And, But, *).
50 text: The step text.
51 doc_string: Optional attached doc string content.
52 data_table: Optional rendered data table string.
53 """
55 keyword: str
56 text: str
57 doc_string: str | None
58 data_table: str | None
61@dataclass(frozen=True, slots=True)
62class ParsedExample:
63 """A single Example parsed from a .feature file.
65 Attributes:
66 example_id: The @id hex identifier.
67 steps: Tuple of parsed steps.
68 background_sections: Background step tuples (feature-level then rule-level).
69 outline_examples: Rendered Examples table string, if Scenario Outline.
70 is_deprecated: True if tagged @deprecated.
71 """
73 example_id: ExampleId
74 steps: tuple[ParsedStep, ...]
75 background_sections: tuple[tuple[ParsedStep, ...], ...]
76 outline_examples: str | None
77 is_deprecated: bool
80@dataclass(frozen=True, slots=True)
81class ParsedRule:
82 """A Rule block parsed from a feature file.
84 Attributes:
85 title: The Rule: title text.
86 rule_slug: Slugified rule title (underscore-separated).
87 examples: Tuple of parsed examples in this rule.
88 is_deprecated: True if the rule itself is deprecated.
89 """
91 title: str
92 rule_slug: RuleSlug
93 examples: tuple[ParsedExample, ...]
94 is_deprecated: bool
97@dataclass(frozen=True, slots=True)
98class ParsedFeature:
99 """A fully parsed .feature file.
101 Attributes:
102 path: Path to the .feature file.
103 feature_slug: Slugified feature folder name (underscore-separated).
104 rules: Tuple of parsed rules (may be empty if no Rule blocks).
105 top_level_examples: Examples not inside any Rule block.
106 is_deprecated: True if the feature is deprecated.
107 """
109 path: Path
110 feature_slug: FeatureSlug
111 rules: tuple[ParsedRule, ...]
112 top_level_examples: tuple[ParsedExample, ...]
113 is_deprecated: bool
115 def all_example_ids(self) -> set[ExampleId]:
116 """Collect all example IDs from rules and top-level examples.
118 Returns:
119 Set of ExampleId objects.
120 """
121 ids: set[ExampleId] = {ex.example_id for ex in self.top_level_examples}
122 for rule in self.rules:
123 ids.update(ex.example_id for ex in rule.examples)
124 return ids
127def _compute_col_widths(all_cells: list[list[str]]) -> list[int]:
128 """Compute maximum column widths across all rows.
130 Args:
131 all_cells: List of rows, each row is a list of cell value strings.
133 Returns:
134 List of column widths.
135 """
136 col_count = max(len(row) for row in all_cells)
137 return [
138 max(len(row[col]) for row in all_cells if col < len(row))
139 for col in range(col_count)
140 ]
143def _render_padded_row(row_cells: list[str], col_widths: list[int]) -> str:
144 """Render a table row with padded cells.
146 Args:
147 row_cells: Cell values for this row.
148 col_widths: Maximum width for each column.
150 Returns:
151 Pipe-delimited row string.
152 """
153 padded = [
154 row_cells[col].ljust(col_widths[col])
155 if col < len(row_cells)
156 else " " * col_widths[col]
157 for col in range(len(col_widths))
158 ]
159 return "| " + " | ".join(padded) + " |"
162def _render_data_table(rows: list[dict[str, Any]]) -> str:
163 """Render a Gherkin data table as a multi-line string.
165 Args:
166 rows: List of row dicts with 'cells' lists.
168 Returns:
169 Rendered table string.
170 """
171 if not rows:
172 return ""
173 all_cells = [
174 [cell.get("value", "") for cell in row.get("cells", [])] for row in rows
175 ]
176 col_widths = _compute_col_widths(all_cells)
177 return "\n".join(_render_padded_row(row, col_widths) for row in all_cells)
180def _render_examples_table(examples: list[dict[str, Any]]) -> str:
181 """Render the Examples table from a Scenario Outline.
183 Args:
184 examples: List of examples dicts from the Gherkin AST.
186 Returns:
187 Rendered Examples table, or empty string if none.
188 """
189 if not examples:
190 return ""
191 header = examples[0].get("tableHeader")
192 body = examples[0].get("tableBody", [])
193 all_rows: list[list[str]] = []
194 if header:
195 all_rows.append([cell.get("value", "") for cell in header.get("cells", [])])
196 for row in body:
197 all_rows.append([cell.get("value", "") for cell in row.get("cells", [])])
198 if not all_rows:
199 return "Examples:"
200 col_widths = _compute_col_widths(all_rows)
201 lines = ["Examples:"] + [
202 " " + _render_padded_row(row, col_widths) for row in all_rows
203 ]
204 return "\n".join(lines)
207def _build_step(raw: dict[str, Any]) -> ParsedStep:
208 """Build a ParsedStep from a Gherkin AST step dict.
210 Args:
211 raw: A step dict from the Gherkin AST.
213 Returns:
214 A ParsedStep.
215 """
216 doc_string: str | None = None
217 data_table: str | None = None
218 if "docString" in raw:
219 doc_string = raw["docString"].get("content", "")
220 if "dataTable" in raw:
221 data_table = _render_data_table(raw["dataTable"].get("rows", []))
222 return ParsedStep(
223 keyword=raw["keyword"].strip(),
224 text=raw.get("text", ""),
225 doc_string=doc_string,
226 data_table=data_table,
227 )
230def _build_steps(raw_steps: list[dict[str, Any]]) -> tuple[ParsedStep, ...]:
231 """Build a tuple of ParsedStep from AST step list.
233 Args:
234 raw_steps: List of step dicts.
236 Returns:
237 Tuple of ParsedStep.
238 """
239 return tuple(_build_step(s) for s in raw_steps)
242def _extract_background(
243 children: list[dict[str, Any]],
244) -> tuple[ParsedStep, ...] | None:
245 """Extract background steps from a list of AST children.
247 Args:
248 children: Child dicts from the Gherkin AST.
250 Returns:
251 Tuple of ParsedStep, or None if no Background.
252 """
253 for child in children:
254 background = child.get("background")
255 if background is not None:
256 return _build_steps(background.get("steps", []))
257 return None
260def _extract_id_from_tags(tags: list[dict[str, Any]]) -> str | None:
261 """Find the @id:<hex> value from Gherkin AST tags.
263 Args:
264 tags: List of tag dicts.
266 Returns:
267 8-char hex ID or None.
268 """
269 for tag in tags:
270 match = _ID_TAG_RE.search(tag.get("name", ""))
271 if match:
272 return match.group(1)
273 return None
276def _has_deprecated_tag(tags: list[dict[str, Any]]) -> bool:
277 """Check if @deprecated tag is present.
279 Args:
280 tags: List of tag dicts.
282 Returns:
283 True if @deprecated is found.
284 """
285 return any(t["name"] == "@deprecated" for t in tags)
288def _collect_background_sections(
289 feature_bg: tuple[ParsedStep, ...] | None,
290 rule_bg: tuple[ParsedStep, ...] | None,
291) -> tuple[tuple[ParsedStep, ...], ...]:
292 """Collect non-None background step tuples in order.
294 Args:
295 feature_bg: Feature-level background steps.
296 rule_bg: Rule-level background steps.
298 Returns:
299 Tuple of background step tuples.
300 """
301 sections = [bg for bg in (feature_bg, rule_bg) if bg is not None]
302 return tuple(sections)
305def _build_example(
306 scenario: dict[str, Any],
307 feature_bg: tuple[ParsedStep, ...] | None,
308 rule_bg: tuple[ParsedStep, ...] | None,
309 parent_deprecated: bool = False,
310) -> ParsedExample | None:
311 """Build a ParsedExample from a scenario dict.
313 Args:
314 scenario: Gherkin AST scenario dict.
315 feature_bg: Feature-level background steps.
316 rule_bg: Rule-level background steps.
317 parent_deprecated: True if a parent (rule or feature) is deprecated.
319 Returns:
320 ParsedExample or None if no @id tag.
321 """
322 tags = scenario.get("tags", [])
323 id_str = _extract_id_from_tags(tags)
324 if id_str is None:
325 return None
326 outline_examples = scenario.get("examples", [])
327 return ParsedExample(
328 example_id=ExampleId(id_str),
329 steps=_build_steps(scenario.get("steps", [])),
330 background_sections=_collect_background_sections(feature_bg, rule_bg),
331 outline_examples=(
332 _render_examples_table(outline_examples) if outline_examples else None
333 ),
334 is_deprecated=parent_deprecated or _has_deprecated_tag(tags),
335 )
338def _example_from_child(
339 child: dict[str, Any],
340 feature_bg: tuple[ParsedStep, ...] | None,
341 rule_bg: tuple[ParsedStep, ...] | None,
342 rule_deprecated: bool,
343) -> ParsedExample | None:
344 """Return a ParsedExample from a rule child dict, or None if not a scenario.
346 Args:
347 child: A child dict from the rule's Gherkin AST.
348 feature_bg: Feature-level background steps.
349 rule_bg: Rule-level background steps.
350 rule_deprecated: True if the rule is deprecated.
352 Returns:
353 ParsedExample or None.
354 """
355 scenario = child.get("scenario")
356 if scenario is None:
357 return None
358 return _build_example(scenario, feature_bg, rule_bg, rule_deprecated)
361def _parse_rule_examples(
362 rule_children: list[dict[str, Any]],
363 feature_bg: tuple[ParsedStep, ...] | None,
364 rule_bg: tuple[ParsedStep, ...] | None,
365 rule_deprecated: bool,
366) -> tuple[ParsedExample, ...]:
367 """Parse all examples from rule children.
369 Args:
370 rule_children: Child dicts from the rule's Gherkin AST.
371 feature_bg: Feature-level background steps.
372 rule_bg: Rule-level background steps.
373 rule_deprecated: True if the rule is deprecated.
375 Returns:
376 Tuple of ParsedExample.
377 """
378 candidates = (
379 _example_from_child(child, feature_bg, rule_bg, rule_deprecated)
380 for child in rule_children
381 )
382 return tuple(ex for ex in candidates if ex is not None)
385def _parse_rule(
386 rule: dict[str, Any],
387 feature_bg: tuple[ParsedStep, ...] | None,
388 feature_deprecated: bool = False,
389) -> ParsedRule:
390 """Parse a Rule block into a ParsedRule.
392 Args:
393 rule: Rule dict from the Gherkin AST.
394 feature_bg: Feature-level background steps.
395 feature_deprecated: True if the parent feature is deprecated.
397 Returns:
398 A ParsedRule.
399 """
400 title = rule.get("name", "")
401 rule_children = rule.get("children", [])
402 rule_deprecated = feature_deprecated or _has_deprecated_tag(rule.get("tags", []))
403 rule_bg = _extract_background(rule_children)
404 examples = _parse_rule_examples(rule_children, feature_bg, rule_bg, rule_deprecated)
405 return ParsedRule(
406 title=title,
407 rule_slug=RuleSlug.from_rule_title(title),
408 examples=examples,
409 is_deprecated=rule_deprecated,
410 )
413def _empty_feature(path: Path, feature_slug: FeatureSlug) -> ParsedFeature:
414 """Return an empty ParsedFeature for a file with no feature block.
416 Args:
417 path: Path to the .feature file.
418 feature_slug: The feature slug.
420 Returns:
421 ParsedFeature with no rules or examples.
422 """
423 return ParsedFeature(
424 path=path,
425 feature_slug=feature_slug,
426 rules=(),
427 top_level_examples=(),
428 is_deprecated=False,
429 )
432def _parse_child(
433 child: dict[str, Any],
434 feature_bg: tuple[ParsedStep, ...] | None,
435 feature_deprecated: bool,
436 rules: list[ParsedRule],
437 top_level: list[ParsedExample],
438) -> None:
439 """Parse one feature child into rules or top-level examples.
441 Args:
442 child: A child dict from the Gherkin AST.
443 feature_bg: Feature-level background steps.
444 feature_deprecated: True if the feature is deprecated.
445 rules: List to append ParsedRule to.
446 top_level: List to append ParsedExample to.
447 """
448 rule_node = child.get("rule")
449 if rule_node is not None:
450 rules.append(_parse_rule(rule_node, feature_bg, feature_deprecated))
451 return
452 scenario = child.get("scenario")
453 if scenario is None:
454 return
455 ex = _build_example(scenario, feature_bg, None, feature_deprecated)
456 if ex is not None:
457 top_level.append(ex)
460def _parse_children(
461 children: list[dict[str, Any]],
462 feature_bg: tuple[ParsedStep, ...] | None,
463 feature_deprecated: bool,
464) -> tuple[tuple[ParsedRule, ...], tuple[ParsedExample, ...]]:
465 """Parse the children of a feature block into rules and top-level examples.
467 Args:
468 children: Child dicts from the Gherkin AST.
469 feature_bg: Feature-level background steps.
470 feature_deprecated: True if the feature is deprecated.
472 Returns:
473 Tuple of (rules, top_level_examples).
474 """
475 rules: list[ParsedRule] = []
476 top_level: list[ParsedExample] = []
477 for child in children:
478 _parse_child(child, feature_bg, feature_deprecated, rules, top_level)
479 return tuple(rules), tuple(top_level)
482def parse_feature(
483 path: Path,
484 folder_name: str | None = None,
485 parser: GherkinParserProtocol | None = None,
486) -> ParsedFeature:
487 """Parse a .feature file into a ParsedFeature.
489 Args:
490 path: Path to the .feature file.
491 folder_name: The feature folder name. Defaults to path.parent.name.
492 parser: Optional Gherkin parser instance. Defaults to GherkinParser().
494 Returns:
495 A ParsedFeature with all examples.
496 """
497 if folder_name is None:
498 parent = path.parent
499 folder_name = parent.name
500 if parser is None:
501 parser = GherkinParser()
502 doc = parser.parse(path.read_text(encoding="utf-8"))
503 feature = cast(dict[str, Any] | None, doc.get("feature"))
504 feature_slug = FeatureSlug.from_folder_name(folder_name)
505 if not feature:
506 return _empty_feature(path, feature_slug)
507 children = feature.get("children", [])
508 feature_deprecated = _has_deprecated_tag(feature.get("tags", []))
509 feature_bg = _extract_background(children)
510 rules, top_level = _parse_children(children, feature_bg, feature_deprecated)
511 return ParsedFeature(
512 path=path,
513 feature_slug=feature_slug,
514 rules=rules,
515 top_level_examples=top_level,
516 is_deprecated=feature_deprecated,
517 )
520def collect_all_example_ids(feature: ParsedFeature) -> set[ExampleId]:
521 """Collect all example IDs from a parsed feature.
523 Args:
524 feature: A ParsedFeature.
526 Returns:
527 Set of ExampleId objects.
528 """
529 return feature.all_example_ids()