Coverage for pytest_beehave/feature_parser.py: 100%

158 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2026-04-21 04:49 +0000

1"""Gherkin feature file parser for pytest-beehave.""" 

2 

3from __future__ import annotations 

4 

5import re 

6from dataclasses import dataclass 

7from pathlib import Path 

8from typing import Any, Protocol, cast 

9 

10from gherkin import Parser as _GherkinParserImpl 

11 

12from pytest_beehave.models import ExampleId, FeatureSlug, RuleSlug 

13 

14_ID_TAG_RE: re.Pattern[str] = re.compile(r"@id:([a-f0-9]{8})") 

15 

16 

17class GherkinParserProtocol(Protocol): 

18 """Protocol for a Gherkin file parser.""" 

19 

20 def parse(self, text: str) -> dict[str, Any]: # pragma: no cover 

21 """Parse Gherkin text into an AST dict.""" 

22 ... 

23 

24 

25class GherkinParser: 

26 """Adapter wrapping the gherkin library Parser to match GherkinParserProtocol.""" 

27 

28 def __init__(self) -> None: 

29 """Initialise the underlying gherkin parser.""" 

30 self._impl = _GherkinParserImpl() 

31 

32 def parse(self, text: str) -> dict[str, Any]: 

33 """Parse Gherkin text into an AST dict. 

34 

35 Args: 

36 text: The Gherkin feature file content. 

37 

38 Returns: 

39 AST as a dict. 

40 """ 

41 return cast(dict[str, Any], self._impl.parse(text)) 

42 

43 

44@dataclass(frozen=True, slots=True) 

45class ParsedStep: 

46 """A single step line parsed from a Gherkin Example. 

47 

48 Attributes: 

49 keyword: The step keyword (Given, When, Then, And, But, *). 

50 text: The step text. 

51 doc_string: Optional attached doc string content. 

52 data_table: Optional rendered data table string. 

53 """ 

54 

55 keyword: str 

56 text: str 

57 doc_string: str | None 

58 data_table: str | None 

59 

60 

61@dataclass(frozen=True, slots=True) 

62class ParsedExample: 

63 """A single Example parsed from a .feature file. 

64 

65 Attributes: 

66 example_id: The @id hex identifier. 

67 steps: Tuple of parsed steps. 

68 background_sections: Background step tuples (feature-level then rule-level). 

69 outline_examples: Rendered Examples table string, if Scenario Outline. 

70 is_deprecated: True if tagged @deprecated. 

71 """ 

72 

73 example_id: ExampleId 

74 steps: tuple[ParsedStep, ...] 

75 background_sections: tuple[tuple[ParsedStep, ...], ...] 

76 outline_examples: str | None 

77 is_deprecated: bool 

78 

79 

80@dataclass(frozen=True, slots=True) 

81class ParsedRule: 

82 """A Rule block parsed from a feature file. 

83 

84 Attributes: 

85 title: The Rule: title text. 

86 rule_slug: Slugified rule title (underscore-separated). 

87 examples: Tuple of parsed examples in this rule. 

88 is_deprecated: True if the rule itself is deprecated. 

89 """ 

90 

91 title: str 

92 rule_slug: RuleSlug 

93 examples: tuple[ParsedExample, ...] 

94 is_deprecated: bool 

95 

96 

97@dataclass(frozen=True, slots=True) 

98class ParsedFeature: 

99 """A fully parsed .feature file. 

100 

101 Attributes: 

102 path: Path to the .feature file. 

103 feature_slug: Slugified feature folder name (underscore-separated). 

104 rules: Tuple of parsed rules (may be empty if no Rule blocks). 

105 top_level_examples: Examples not inside any Rule block. 

106 is_deprecated: True if the feature is deprecated. 

107 """ 

108 

109 path: Path 

110 feature_slug: FeatureSlug 

111 rules: tuple[ParsedRule, ...] 

112 top_level_examples: tuple[ParsedExample, ...] 

113 is_deprecated: bool 

114 

115 def all_example_ids(self) -> set[ExampleId]: 

116 """Collect all example IDs from rules and top-level examples. 

117 

118 Returns: 

119 Set of ExampleId objects. 

120 """ 

121 ids: set[ExampleId] = {ex.example_id for ex in self.top_level_examples} 

122 for rule in self.rules: 

123 ids.update(ex.example_id for ex in rule.examples) 

124 return ids 

125 

126 

127def _compute_col_widths(all_cells: list[list[str]]) -> list[int]: 

128 """Compute maximum column widths across all rows. 

129 

130 Args: 

131 all_cells: List of rows, each row is a list of cell value strings. 

132 

133 Returns: 

134 List of column widths. 

135 """ 

136 col_count = max(len(row) for row in all_cells) 

137 return [ 

138 max(len(row[col]) for row in all_cells if col < len(row)) 

139 for col in range(col_count) 

140 ] 

141 

142 

143def _render_padded_row(row_cells: list[str], col_widths: list[int]) -> str: 

144 """Render a table row with padded cells. 

145 

146 Args: 

147 row_cells: Cell values for this row. 

148 col_widths: Maximum width for each column. 

149 

150 Returns: 

151 Pipe-delimited row string. 

152 """ 

153 padded = [ 

154 row_cells[col].ljust(col_widths[col]) 

155 if col < len(row_cells) 

156 else " " * col_widths[col] 

157 for col in range(len(col_widths)) 

158 ] 

159 return "| " + " | ".join(padded) + " |" 

160 

161 

162def _render_data_table(rows: list[dict[str, Any]]) -> str: 

163 """Render a Gherkin data table as a multi-line string. 

164 

165 Args: 

166 rows: List of row dicts with 'cells' lists. 

167 

168 Returns: 

169 Rendered table string. 

170 """ 

171 if not rows: 

172 return "" 

173 all_cells = [ 

174 [cell.get("value", "") for cell in row.get("cells", [])] for row in rows 

175 ] 

176 col_widths = _compute_col_widths(all_cells) 

177 return "\n".join(_render_padded_row(row, col_widths) for row in all_cells) 

178 

179 

180def _render_examples_table(examples: list[dict[str, Any]]) -> str: 

181 """Render the Examples table from a Scenario Outline. 

182 

183 Args: 

184 examples: List of examples dicts from the Gherkin AST. 

185 

186 Returns: 

187 Rendered Examples table, or empty string if none. 

188 """ 

189 if not examples: 

190 return "" 

191 header = examples[0].get("tableHeader") 

192 body = examples[0].get("tableBody", []) 

193 all_rows: list[list[str]] = [] 

194 if header: 

195 all_rows.append([cell.get("value", "") for cell in header.get("cells", [])]) 

196 for row in body: 

197 all_rows.append([cell.get("value", "") for cell in row.get("cells", [])]) 

198 if not all_rows: 

199 return "Examples:" 

200 col_widths = _compute_col_widths(all_rows) 

201 lines = ["Examples:"] + [ 

202 " " + _render_padded_row(row, col_widths) for row in all_rows 

203 ] 

204 return "\n".join(lines) 

205 

206 

207def _build_step(raw: dict[str, Any]) -> ParsedStep: 

208 """Build a ParsedStep from a Gherkin AST step dict. 

209 

210 Args: 

211 raw: A step dict from the Gherkin AST. 

212 

213 Returns: 

214 A ParsedStep. 

215 """ 

216 doc_string: str | None = None 

217 data_table: str | None = None 

218 if "docString" in raw: 

219 doc_string = raw["docString"].get("content", "") 

220 if "dataTable" in raw: 

221 data_table = _render_data_table(raw["dataTable"].get("rows", [])) 

222 return ParsedStep( 

223 keyword=raw["keyword"].strip(), 

224 text=raw.get("text", ""), 

225 doc_string=doc_string, 

226 data_table=data_table, 

227 ) 

228 

229 

230def _build_steps(raw_steps: list[dict[str, Any]]) -> tuple[ParsedStep, ...]: 

231 """Build a tuple of ParsedStep from AST step list. 

232 

233 Args: 

234 raw_steps: List of step dicts. 

235 

236 Returns: 

237 Tuple of ParsedStep. 

238 """ 

239 return tuple(_build_step(s) for s in raw_steps) 

240 

241 

242def _extract_background( 

243 children: list[dict[str, Any]], 

244) -> tuple[ParsedStep, ...] | None: 

245 """Extract background steps from a list of AST children. 

246 

247 Args: 

248 children: Child dicts from the Gherkin AST. 

249 

250 Returns: 

251 Tuple of ParsedStep, or None if no Background. 

252 """ 

253 for child in children: 

254 background = child.get("background") 

255 if background is not None: 

256 return _build_steps(background.get("steps", [])) 

257 return None 

258 

259 

260def _extract_id_from_tags(tags: list[dict[str, Any]]) -> str | None: 

261 """Find the @id:<hex> value from Gherkin AST tags. 

262 

263 Args: 

264 tags: List of tag dicts. 

265 

266 Returns: 

267 8-char hex ID or None. 

268 """ 

269 for tag in tags: 

270 match = _ID_TAG_RE.search(tag.get("name", "")) 

271 if match: 

272 return match.group(1) 

273 return None 

274 

275 

276def _has_deprecated_tag(tags: list[dict[str, Any]]) -> bool: 

277 """Check if @deprecated tag is present. 

278 

279 Args: 

280 tags: List of tag dicts. 

281 

282 Returns: 

283 True if @deprecated is found. 

284 """ 

285 return any(t["name"] == "@deprecated" for t in tags) 

286 

287 

288def _collect_background_sections( 

289 feature_bg: tuple[ParsedStep, ...] | None, 

290 rule_bg: tuple[ParsedStep, ...] | None, 

291) -> tuple[tuple[ParsedStep, ...], ...]: 

292 """Collect non-None background step tuples in order. 

293 

294 Args: 

295 feature_bg: Feature-level background steps. 

296 rule_bg: Rule-level background steps. 

297 

298 Returns: 

299 Tuple of background step tuples. 

300 """ 

301 sections = [bg for bg in (feature_bg, rule_bg) if bg is not None] 

302 return tuple(sections) 

303 

304 

305def _build_example( 

306 scenario: dict[str, Any], 

307 feature_bg: tuple[ParsedStep, ...] | None, 

308 rule_bg: tuple[ParsedStep, ...] | None, 

309 parent_deprecated: bool = False, 

310) -> ParsedExample | None: 

311 """Build a ParsedExample from a scenario dict. 

312 

313 Args: 

314 scenario: Gherkin AST scenario dict. 

315 feature_bg: Feature-level background steps. 

316 rule_bg: Rule-level background steps. 

317 parent_deprecated: True if a parent (rule or feature) is deprecated. 

318 

319 Returns: 

320 ParsedExample or None if no @id tag. 

321 """ 

322 tags = scenario.get("tags", []) 

323 id_str = _extract_id_from_tags(tags) 

324 if id_str is None: 

325 return None 

326 outline_examples = scenario.get("examples", []) 

327 return ParsedExample( 

328 example_id=ExampleId(id_str), 

329 steps=_build_steps(scenario.get("steps", [])), 

330 background_sections=_collect_background_sections(feature_bg, rule_bg), 

331 outline_examples=( 

332 _render_examples_table(outline_examples) if outline_examples else None 

333 ), 

334 is_deprecated=parent_deprecated or _has_deprecated_tag(tags), 

335 ) 

336 

337 

338def _example_from_child( 

339 child: dict[str, Any], 

340 feature_bg: tuple[ParsedStep, ...] | None, 

341 rule_bg: tuple[ParsedStep, ...] | None, 

342 rule_deprecated: bool, 

343) -> ParsedExample | None: 

344 """Return a ParsedExample from a rule child dict, or None if not a scenario. 

345 

346 Args: 

347 child: A child dict from the rule's Gherkin AST. 

348 feature_bg: Feature-level background steps. 

349 rule_bg: Rule-level background steps. 

350 rule_deprecated: True if the rule is deprecated. 

351 

352 Returns: 

353 ParsedExample or None. 

354 """ 

355 scenario = child.get("scenario") 

356 if scenario is None: 

357 return None 

358 return _build_example(scenario, feature_bg, rule_bg, rule_deprecated) 

359 

360 

361def _parse_rule_examples( 

362 rule_children: list[dict[str, Any]], 

363 feature_bg: tuple[ParsedStep, ...] | None, 

364 rule_bg: tuple[ParsedStep, ...] | None, 

365 rule_deprecated: bool, 

366) -> tuple[ParsedExample, ...]: 

367 """Parse all examples from rule children. 

368 

369 Args: 

370 rule_children: Child dicts from the rule's Gherkin AST. 

371 feature_bg: Feature-level background steps. 

372 rule_bg: Rule-level background steps. 

373 rule_deprecated: True if the rule is deprecated. 

374 

375 Returns: 

376 Tuple of ParsedExample. 

377 """ 

378 candidates = ( 

379 _example_from_child(child, feature_bg, rule_bg, rule_deprecated) 

380 for child in rule_children 

381 ) 

382 return tuple(ex for ex in candidates if ex is not None) 

383 

384 

385def _parse_rule( 

386 rule: dict[str, Any], 

387 feature_bg: tuple[ParsedStep, ...] | None, 

388 feature_deprecated: bool = False, 

389) -> ParsedRule: 

390 """Parse a Rule block into a ParsedRule. 

391 

392 Args: 

393 rule: Rule dict from the Gherkin AST. 

394 feature_bg: Feature-level background steps. 

395 feature_deprecated: True if the parent feature is deprecated. 

396 

397 Returns: 

398 A ParsedRule. 

399 """ 

400 title = rule.get("name", "") 

401 rule_children = rule.get("children", []) 

402 rule_deprecated = feature_deprecated or _has_deprecated_tag(rule.get("tags", [])) 

403 rule_bg = _extract_background(rule_children) 

404 examples = _parse_rule_examples(rule_children, feature_bg, rule_bg, rule_deprecated) 

405 return ParsedRule( 

406 title=title, 

407 rule_slug=RuleSlug.from_rule_title(title), 

408 examples=examples, 

409 is_deprecated=rule_deprecated, 

410 ) 

411 

412 

413def _empty_feature(path: Path, feature_slug: FeatureSlug) -> ParsedFeature: 

414 """Return an empty ParsedFeature for a file with no feature block. 

415 

416 Args: 

417 path: Path to the .feature file. 

418 feature_slug: The feature slug. 

419 

420 Returns: 

421 ParsedFeature with no rules or examples. 

422 """ 

423 return ParsedFeature( 

424 path=path, 

425 feature_slug=feature_slug, 

426 rules=(), 

427 top_level_examples=(), 

428 is_deprecated=False, 

429 ) 

430 

431 

432def _parse_child( 

433 child: dict[str, Any], 

434 feature_bg: tuple[ParsedStep, ...] | None, 

435 feature_deprecated: bool, 

436 rules: list[ParsedRule], 

437 top_level: list[ParsedExample], 

438) -> None: 

439 """Parse one feature child into rules or top-level examples. 

440 

441 Args: 

442 child: A child dict from the Gherkin AST. 

443 feature_bg: Feature-level background steps. 

444 feature_deprecated: True if the feature is deprecated. 

445 rules: List to append ParsedRule to. 

446 top_level: List to append ParsedExample to. 

447 """ 

448 rule_node = child.get("rule") 

449 if rule_node is not None: 

450 rules.append(_parse_rule(rule_node, feature_bg, feature_deprecated)) 

451 return 

452 scenario = child.get("scenario") 

453 if scenario is None: 

454 return 

455 ex = _build_example(scenario, feature_bg, None, feature_deprecated) 

456 if ex is not None: 

457 top_level.append(ex) 

458 

459 

460def _parse_children( 

461 children: list[dict[str, Any]], 

462 feature_bg: tuple[ParsedStep, ...] | None, 

463 feature_deprecated: bool, 

464) -> tuple[tuple[ParsedRule, ...], tuple[ParsedExample, ...]]: 

465 """Parse the children of a feature block into rules and top-level examples. 

466 

467 Args: 

468 children: Child dicts from the Gherkin AST. 

469 feature_bg: Feature-level background steps. 

470 feature_deprecated: True if the feature is deprecated. 

471 

472 Returns: 

473 Tuple of (rules, top_level_examples). 

474 """ 

475 rules: list[ParsedRule] = [] 

476 top_level: list[ParsedExample] = [] 

477 for child in children: 

478 _parse_child(child, feature_bg, feature_deprecated, rules, top_level) 

479 return tuple(rules), tuple(top_level) 

480 

481 

482def parse_feature( 

483 path: Path, 

484 folder_name: str | None = None, 

485 parser: GherkinParserProtocol | None = None, 

486) -> ParsedFeature: 

487 """Parse a .feature file into a ParsedFeature. 

488 

489 Args: 

490 path: Path to the .feature file. 

491 folder_name: The feature folder name. Defaults to path.parent.name. 

492 parser: Optional Gherkin parser instance. Defaults to GherkinParser(). 

493 

494 Returns: 

495 A ParsedFeature with all examples. 

496 """ 

497 if folder_name is None: 

498 parent = path.parent 

499 folder_name = parent.name 

500 if parser is None: 

501 parser = GherkinParser() 

502 doc = parser.parse(path.read_text(encoding="utf-8")) 

503 feature = cast(dict[str, Any] | None, doc.get("feature")) 

504 feature_slug = FeatureSlug.from_folder_name(folder_name) 

505 if not feature: 

506 return _empty_feature(path, feature_slug) 

507 children = feature.get("children", []) 

508 feature_deprecated = _has_deprecated_tag(feature.get("tags", [])) 

509 feature_bg = _extract_background(children) 

510 rules, top_level = _parse_children(children, feature_bg, feature_deprecated) 

511 return ParsedFeature( 

512 path=path, 

513 feature_slug=feature_slug, 

514 rules=rules, 

515 top_level_examples=top_level, 

516 is_deprecated=feature_deprecated, 

517 ) 

518 

519 

520def collect_all_example_ids(feature: ParsedFeature) -> set[ExampleId]: 

521 """Collect all example IDs from a parsed feature. 

522 

523 Args: 

524 feature: A ParsedFeature. 

525 

526 Returns: 

527 Set of ExampleId objects. 

528 """ 

529 return feature.all_example_ids()