Coverage for app/backend/src/couchers/markup.py: 100%
24 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-28 16:00 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-28 16:00 +0000
1from html.parser import HTMLParser
2from typing import Any
4from markdown_it import MarkdownIt
5from markupsafe import Markup
7# Markdown config should match frontend's MarkdownNoSSR component.
8_markdown = MarkdownIt(
9 "zero", # Base configuration disables all features
10 options_update={
11 "typographer": True, # Enable some language-neutral replacement + quotes beautification
12 "breaks": True, # Convert '\n' in paragraphs into <br>
13 },
14).enable(
15 [
16 "emphasis", # Process *this* and _that_
17 "heading", # Headings (#, ##, ...)
18 "hr", # Horizontal rule
19 "link", # Process [link](<to> "stuff")
20 "list", # Lists
21 "newline", # Process '\n'
22 "smartquotes", # Convert straight quotation marks to typographic ones
23 ]
24)
27def markdown_to_html(text: str) -> Markup:
28 return Markup(_markdown.render(text))
31def markdown_to_plaintext(text: str) -> str:
32 return html_to_plaintext(markdown_to_html(text))
35def html_to_plaintext(html: str | Markup) -> str:
36 """
37 Renders a plaintext version of HTML by extracting inner HTML and converting entities+newlines.
38 Do not use for sanitization. The resulting string may not be markup-safe.
39 """
41 if isinstance(html, Markup):
42 html = str(html)
44 converter = _HTMLToPlaintext()
45 converter.feed(html)
46 return converter.plaintext
49class _HTMLToPlaintext(HTMLParser):
50 plaintext: str
52 def __init__(self) -> None:
53 super().__init__()
54 self.plaintext = ""
56 def handle_starttag(self, tag: str, attrs: Any) -> None:
57 if tag == "br":
58 self.plaintext += "\n"
60 def handle_data(self, data: str) -> None:
61 # Escapes have already been unescaped
62 self.plaintext += data.replace("\n", "") # Newlines in html are meaningless