From dd60de5067a17f4461e7855113c92f44aaa87241 Mon Sep 17 00:00:00 2001 From: hkc Date: Tue, 11 Oct 2022 09:00:37 +0300 Subject: [PATCH] Added more HTML tags --- mastoposter/utils.py | 125 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/mastoposter/utils.py b/mastoposter/utils.py index 7e234a2..30e0352 100644 --- a/mastoposter/utils.py +++ b/mastoposter/utils.py @@ -1,4 +1,5 @@ from html import escape +from typing import Callable, Dict from bs4.element import Tag, PageElement @@ -16,31 +17,119 @@ def md_escape(text: str) -> str: def node_to_html(el: PageElement) -> str: + TAG_TRANSFORMS: Dict[str, Callable[[Tag,], str]] = { + "a": lambda tag: '{}'.format( + escape(tag.attrs["href"]), + str.join("", map(node_to_html, tag.children)), + ), + "p": lambda tag: ( + str.join("", map(node_to_html, tag.children)) + "\n\n" + ), + "i": lambda tag: ( + "%s" % str.join("", map(node_to_html, tag.children)) + ), + "b": lambda tag: ( + "%s" % str.join("", map(node_to_html, tag.children)) + ), + "s": lambda tag: ( + "%s" % str.join("", map(node_to_html, tag.children)) + ), + "u": lambda tag: ( + "%s" % str.join("", map(node_to_html, tag.children)) + ), + "pre": lambda tag: ( + "\n
%s
\n" % str.join("", map(node_to_html, tag.children)) + ), + "code": lambda tag: ( + "%s" % str.join("", map(node_to_html, tag.children)) + ), + "blockquote": lambda tag: "\n%s" + % str.join( + "\n", + ( + "| %s" % part + for part in str.join( + "", map(node_to_html, tag.children) + ).split("\n") + ), + ), + "br": lambda _: "\n", + } + + TAG_SUBSTITUTIONS: Dict[str, str] = { + "strong": "b", + "em": "i", + "del": "s", + "ins": "u", + } + if isinstance(el, Tag): - if el.name == "a": - return '{}'.format( - escape(el.attrs["href"]), - str.join("", map(node_to_html, el.children)), - ) - elif el.name == "p": - return str.join("", map(node_to_html, el.children)) + "\n\n" - elif el.name == "br": - return "\n" + if el.name in TAG_TRANSFORMS: + return TAG_TRANSFORMS[el.name](el) + if el.name in TAG_SUBSTITUTIONS: + sub = TAG_SUBSTITUTIONS[el.name] + if sub in TAG_TRANSFORMS: + return TAG_TRANSFORMS[sub](el) return str.join("", map(node_to_html, el.children)) return escape(str(el)) def node_to_markdown(el: PageElement) -> str: - if isinstance(el, Tag): - if el.name == "a": - return "[%s](%s)" % ( - md_escape(str.join("", map(node_to_markdown, el.children))), - el.attrs["href"], + TAG_TRANSFORMS: Dict[str, Callable[[Tag,], str]] = { + "a": lambda tag: "[{}]({})".format( + md_escape(str.join("", map(node_to_markdown, tag.children))), + tag.attrs["href"], + ), + "p": lambda tag: ( + str.join("", map(node_to_markdown, tag.children)) + "\n\n" + ), + "i": lambda tag: ( + "_%s_" % str.join("", map(node_to_markdown, tag.children)) + ), + "b": lambda tag: ( + "*%s*" % str.join("", map(node_to_markdown, tag.children)) + ), + "s": lambda tag: ( + "~%s~" % str.join("", map(node_to_markdown, tag.children)) + ), + "u": lambda tag: ( + "__%s__" % str.join("", map(node_to_markdown, tag.children)) + ), + "pre": lambda tag: ( + "\n``%s``\n" % str.join("", map(node_to_markdown, tag.children)) + ), + "code": lambda tag: ( + "`%s`" % str.join("", map(node_to_markdown, tag.children)) + ), + "blockquote": lambda tag: ( + "\n%s" + % str.join( + "\n", + ( + "▍%s" % part + for part in str.join( + "", map(node_to_markdown, tag.children) + ).split("\n") + ), ) - elif el.name == "p": - return str.join("", map(node_to_markdown, el.children)) + "\n\n" - elif el.name == "br": - return "\n" + ), + "br": lambda _: "\n", + } + + TAG_SUBSTITUTIONS: Dict[str, str] = { + "strong": "b", + "em": "i", + "del": "s", + "ins": "u", + } + + if isinstance(el, Tag): + if el.name in TAG_TRANSFORMS: + return TAG_TRANSFORMS[el.name](el) + if el.name in TAG_SUBSTITUTIONS: + sub = TAG_SUBSTITUTIONS[el.name] + if sub in TAG_TRANSFORMS: + return TAG_TRANSFORMS[sub](el) return str.join("", map(node_to_markdown, el.children)) return md_escape(str(el))