mastoposter-oss_images/mastoposter/text/__init__.py

106 lines
3.0 KiB
Python

"""
mastoposter - configurable reposter from Mastodon-compatible Fediverse servers
Copyright (C) 2022-2023 hatkidchan <hatkidchan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
"""
from typing import Callable, Iterable, Literal, Optional
from bs4.element import Tag, PageElement
VALID_OUTPUT_TYPES = Literal["plain", "html", "markdown"]
BULLET = "\u2022"
STRIPE = "\u258d"
def md_escape(text: str) -> str:
return (
text.replace("\\", "\\\\")
.replace("*", "\\*")
.replace("[", "\\[")
.replace("]", "\\]")
.replace("_", "\\_")
.replace("~", "\\~")
.replace("|", "\\|")
.replace("`", "\\`")
)
node_processors: dict[
tuple[VALID_OUTPUT_TYPES, str],
list[
Callable[
[
PageElement,
],
Optional[str],
]
],
] = {}
def register_converter(tag: str, output_type: VALID_OUTPUT_TYPES = "plain"):
def decorate(function):
node_processors.setdefault((output_type, tag), [])
node_processors[output_type, tag].append(function)
return function
return decorate
def register_text_node_converter(output_type: VALID_OUTPUT_TYPES = "plain"):
def decorate(function):
node_processors[output_type, ":text:"] = [function]
return function
return decorate
def register_fmt_converter(
format: str,
tag: str,
output_type: VALID_OUTPUT_TYPES = "plain",
separator: str = "",
):
def fmt_tag(el: Tag) -> str:
if "%s" in format:
return format % nodes_process(el.children, output_type, separator)
return format
register_converter(tag, output_type)(fmt_tag)
def node_process(el: PageElement, type_: VALID_OUTPUT_TYPES) -> str:
if isinstance(el, Tag):
if (type_, el.name) in node_processors:
for func in node_processors[type_, el.name]:
result = func(el)
if result:
return result
return nodes_process(el.children, type_)
if (type_, ":text:") in node_processors:
return node_processors[type_, ":text:"][0](el) or str(el)
return str(el)
def nodes_process(
els: Iterable[PageElement],
type_: VALID_OUTPUT_TYPES = "plain",
separator: str = "",
) -> str:
return str.join(separator, (node_process(el, type_) for el in els))
__all__ = ["node_process", "nodes_process", "md_escape", "BULLET", "STRIPE"]
import mastoposter.text.html # noqa F401
import mastoposter.text.markdown # noqa F401
import mastoposter.text.plain # noqa F401