Coverage for app/backend/src/couchers/markup.py: 100%

24 statements  

« prev     ^ index     » next       coverage.py v7.14.3, created at 2026-06-28 16:00 +0000

1from html.parser import HTMLParser 

2from typing import Any 

3 

4from markdown_it import MarkdownIt 

5from markupsafe import Markup 

6 

7# Markdown config should match frontend's MarkdownNoSSR component. 

8_markdown = MarkdownIt( 

9 "zero", # Base configuration disables all features 

10 options_update={ 

11 "typographer": True, # Enable some language-neutral replacement + quotes beautification 

12 "breaks": True, # Convert '\n' in paragraphs into <br> 

13 }, 

14).enable( 

15 [ 

16 "emphasis", # Process *this* and _that_ 

17 "heading", # Headings (#, ##, ...) 

18 "hr", # Horizontal rule 

19 "link", # Process [link](<to> "stuff") 

20 "list", # Lists 

21 "newline", # Process '\n' 

22 "smartquotes", # Convert straight quotation marks to typographic ones 

23 ] 

24) 

25 

26 

27def markdown_to_html(text: str) -> Markup: 

28 return Markup(_markdown.render(text)) 

29 

30 

31def markdown_to_plaintext(text: str) -> str: 

32 return html_to_plaintext(markdown_to_html(text)) 

33 

34 

35def html_to_plaintext(html: str | Markup) -> str: 

36 """ 

37 Renders a plaintext version of HTML by extracting inner HTML and converting entities+newlines. 

38 Do not use for sanitization. The resulting string may not be markup-safe. 

39 """ 

40 

41 if isinstance(html, Markup): 

42 html = str(html) 

43 

44 converter = _HTMLToPlaintext() 

45 converter.feed(html) 

46 return converter.plaintext 

47 

48 

49class _HTMLToPlaintext(HTMLParser): 

50 plaintext: str 

51 

52 def __init__(self) -> None: 

53 super().__init__() 

54 self.plaintext = "" 

55 

56 def handle_starttag(self, tag: str, attrs: Any) -> None: 

57 if tag == "br": 

58 self.plaintext += "\n" 

59 

60 def handle_data(self, data: str) -> None: 

61 # Escapes have already been unescaped 

62 self.plaintext += data.replace("\n", "") # Newlines in html are meaningless