Fixed regex mode of text filter just NOT WORKING

Apparently, `re.match` checks for the first line ONLY. We have to use
`.search` instead.

Also nuked away that plaintext conversion, we have it in status already
as a property
This commit is contained in:
Casey 2023-02-08 19:38:31 +03:00
parent 8b03ccf44a
commit 6d3e00ba4a
Signed by: hkc
GPG Key ID: F0F6CFE11CDB0960
1 changed files with 1 additions and 22 deletions

View File

@ -2,7 +2,6 @@ from configparser import SectionProxy
from re import Pattern, compile as regexp from re import Pattern, compile as regexp
from typing import Optional, Set from typing import Optional, Set
from bs4 import BeautifulSoup, PageElement, Tag
from mastoposter.filters.base import BaseFilter from mastoposter.filters.base import BaseFilter
from mastoposter.types import Status from mastoposter.types import Status
@ -27,30 +26,10 @@ class TextFilter(BaseFilter, filter_name="content"):
return cls(tags=set(section["tags"].split())) return cls(tags=set(section["tags"].split()))
raise AssertionError("neither regexp or tags were set") raise AssertionError("neither regexp or tags were set")
@classmethod
def node_to_text(cls, el: PageElement) -> str:
if isinstance(el, Tag):
if el.name == "br":
return "\n"
elif el.name == "p":
return (
str.join("", map(cls.node_to_text, el.children)) + "\n\n"
)
return str.join("", map(cls.node_to_text, el.children))
return str(el)
@classmethod
def html_to_plain(cls, html: str) -> str:
soup = BeautifulSoup(html, "lxml")
return cls.node_to_text(soup).rstrip()
def __call__(self, status: Status) -> bool: def __call__(self, status: Status) -> bool:
source = status.reblog or status source = status.reblog or status
if self.regexp is not None: if self.regexp is not None:
return ( return self.regexp.search(source.content_plaintext) is not None
self.regexp.match(self.html_to_plain(source.content))
is not None
)
elif self.tags: elif self.tags:
return len(self.tags & {t.name.lower() for t in source.tags}) > 0 return len(self.tags & {t.name.lower() for t in source.tags}) > 0
else: else: