forked from hkc/mastoposter
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
|
from html import escape
|
||
|
from bs4.element import Tag, PageElement
|
||
|
|
||
|
|
||
|
def md_escape(text: str) -> str:
|
||
|
return (
|
||
|
text.replace("\\", "\\\\")
|
||
|
.replace("*", "\\*")
|
||
|
.replace("[", "\\[")
|
||
|
.replace("]", "\\]")
|
||
|
.replace("_", "\\_")
|
||
|
.replace("~", "\\~")
|
||
|
.replace("|", "\\|")
|
||
|
.replace("`", "\\`")
|
||
|
)
|
||
|
|
||
|
|
||
|
def node_to_html(el: PageElement) -> str:
|
||
|
if isinstance(el, Tag):
|
||
|
if el.name == "a":
|
||
|
return '<a href="{}">{}</a>'.format(
|
||
|
escape(el.attrs["href"]),
|
||
|
str.join("", map(node_to_html, el.children)),
|
||
|
)
|
||
|
elif el.name == "p":
|
||
|
return str.join("", map(node_to_html, el.children)) + "\n\n"
|
||
|
elif el.name == "br":
|
||
|
return "\n"
|
||
|
return str.join("", map(node_to_html, el.children))
|
||
|
return escape(str(el))
|
||
|
|
||
|
|
||
|
def node_to_markdown(el: PageElement) -> str:
|
||
|
if isinstance(el, Tag):
|
||
|
if el.name == "a":
|
||
|
return "[%s](%s)" % (
|
||
|
md_escape(str.join("", map(node_to_markdown, el.children))),
|
||
|
el.attrs["href"],
|
||
|
)
|
||
|
elif el.name == "p":
|
||
|
return str.join("", map(node_to_markdown, el.children)) + "\n\n"
|
||
|
elif el.name == "br":
|
||
|
return "\n"
|
||
|
return str.join("", map(node_to_markdown, el.children))
|
||
|
return md_escape(str(el))
|
||
|
|
||
|
|
||
|
def node_to_plaintext(el: PageElement) -> str:
|
||
|
if isinstance(el, Tag):
|
||
|
if el.name == "a":
|
||
|
return "%s (%s)" % (
|
||
|
str.join("", map(node_to_plaintext, el.children)),
|
||
|
el.attrs["href"],
|
||
|
)
|
||
|
elif el.name == "p":
|
||
|
return str.join("", map(node_to_plaintext, el.children)) + "\n\n"
|
||
|
elif el.name == "br":
|
||
|
return "\n"
|
||
|
return str.join("", map(node_to_plaintext, el.children))
|
||
|
return str(el)
|