From 6d3e00ba4ab4389a59ff0a9ac594ea3aba2d2d18 Mon Sep 17 00:00:00 2001 From: hkc Date: Wed, 8 Feb 2023 19:38:31 +0300 Subject: [PATCH] Fixed regex mode of text filter just NOT WORKING Apparently, `re.match` checks for the first line ONLY. We have to use `.search` instead. Also nuked away that plaintext conversion, we have it in status already as a property --- mastoposter/filters/text.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/mastoposter/filters/text.py b/mastoposter/filters/text.py index 56bc28d..2d04dd8 100644 --- a/mastoposter/filters/text.py +++ b/mastoposter/filters/text.py @@ -2,7 +2,6 @@ from configparser import SectionProxy from re import Pattern, compile as regexp from typing import Optional, Set -from bs4 import BeautifulSoup, PageElement, Tag from mastoposter.filters.base import BaseFilter from mastoposter.types import Status @@ -27,30 +26,10 @@ class TextFilter(BaseFilter, filter_name="content"): return cls(tags=set(section["tags"].split())) raise AssertionError("neither regexp or tags were set") - @classmethod - def node_to_text(cls, el: PageElement) -> str: - if isinstance(el, Tag): - if el.name == "br": - return "\n" - elif el.name == "p": - return ( - str.join("", map(cls.node_to_text, el.children)) + "\n\n" - ) - return str.join("", map(cls.node_to_text, el.children)) - return str(el) - - @classmethod - def html_to_plain(cls, html: str) -> str: - soup = BeautifulSoup(html, "lxml") - return cls.node_to_text(soup).rstrip() - def __call__(self, status: Status) -> bool: source = status.reblog or status if self.regexp is not None: - return ( - self.regexp.match(self.html_to_plain(source.content)) - is not None - ) + return self.regexp.search(source.content_plaintext) is not None elif self.tags: return len(self.tags & {t.name.lower() for t in source.tags}) > 0 else: