1
0
Fork 0

Fixed regex mode of text filter just NOT WORKING

Apparently, `re.match` checks for the first line ONLY. We have to use
`.search` instead.

Also nuked away that plaintext conversion, we have it in status already
as a property
This commit is contained in:
Casey 2023-02-08 19:38:31 +03:00
parent 8b03ccf44a
commit 6d3e00ba4a
Signed by: hkc
GPG Key ID: F0F6CFE11CDB0960
1 changed files with 1 additions and 22 deletions

View File

@ -2,7 +2,6 @@ from configparser import SectionProxy
from re import Pattern, compile as regexp
from typing import Optional, Set
from bs4 import BeautifulSoup, PageElement, Tag
from mastoposter.filters.base import BaseFilter
from mastoposter.types import Status
@ -27,30 +26,10 @@ class TextFilter(BaseFilter, filter_name="content"):
return cls(tags=set(section["tags"].split()))
raise AssertionError("neither regexp or tags were set")
@classmethod
def node_to_text(cls, el: PageElement) -> str:
if isinstance(el, Tag):
if el.name == "br":
return "\n"
elif el.name == "p":
return (
str.join("", map(cls.node_to_text, el.children)) + "\n\n"
)
return str.join("", map(cls.node_to_text, el.children))
return str(el)
@classmethod
def html_to_plain(cls, html: str) -> str:
soup = BeautifulSoup(html, "lxml")
return cls.node_to_text(soup).rstrip()
def __call__(self, status: Status) -> bool:
source = status.reblog or status
if self.regexp is not None:
return (
self.regexp.match(self.html_to_plain(source.content))
is not None
)
return self.regexp.search(source.content_plaintext) is not None
elif self.tags:
return len(self.tags & {t.name.lower() for t in source.tags}) > 0
else: