diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e2e2fed --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.10-alpine + +WORKDIR /app + +COPY requirements.txt /app/requirements.txt + +RUN pip install -r /app/requirements.txt && rm /app/requirements.txt + +COPY . /app + +CMD ["python3", "-m", "nfuck"] diff --git a/logging.json b/logging.json new file mode 100644 index 0000000..8d2f159 --- /dev/null +++ b/logging.json @@ -0,0 +1,24 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "simple": { + "format": "%(asctime)s %(levelname)s %(module)s L%(lineno)d: %(message)s" + } + }, + "handlers": { + "stdout": { + "class": "logging.StreamHandler", + "formatter": "simple", + "stream": "ext://sys.stdout" + } + }, + "loggers": { + "root": { + "level": "DEBUG", + "handlers": [ + "stdout" + ] + } + } +} diff --git a/nfuck/__init__.py b/nfuck/__init__.py index e9ef936..cbb183d 100644 --- a/nfuck/__init__.py +++ b/nfuck/__init__.py @@ -3,44 +3,89 @@ from aiogram.types import Message from aiogram.filters import Command from httpx import AsyncClient -from nfuck.link_verifier import explain_verification, get_random_useragent, verify_link +from nfuck.link_verifier import ( + explain_verification, + get_random_useragent, + verify_link, +) +from nfuck.utils import sanitize_link + dp = Dispatcher() +# @dp.message(Command("dd")) +# async def on_dd(message: Message): +# if message.reply_to_message: +# await message.reply_to_message.delete() + + @dp.message(Command("check")) async def on_check(message: Message): results = [] for entity in message.entities or []: if entity.type in ("text_link", "url") and message.text: if entity.type == "url": - entity.url = message.text[entity.offset : entity.offset + entity.length] + entity.url = message.text[ + entity.offset : entity.offset + entity.length + ] if not entity.url: continue + if not entity.url.startswith("http"): + entity.url = "https://" + entity.url async with AsyncClient( headers={"User-Agent": get_random_useragent()} ) as client: data = (await client.get(entity.url)).text total_score = 0 - results.append(f"{entity.url}") + results.append(f"{sanitize_link(entity.url)}") for score, explanation, match in explain_verification(data): results.append(f"{match.span()}: {explanation}") total_score += score results.append(f"Total score: {total_score}") results.append("") - await message.reply(str.join("\n", results), parse_mode="html") + if results: + await message.reply( + str.join("\n", results), + parse_mode="html", + disable_web_page_preview=True, + ) + else: + await message.reply(":shrug:") @dp.message() async def on_message(message: Message): + detected_links: list[tuple[str, float]] = [] for entity in message.entities or []: if entity.type in ("text_link", "url") and message.text: if entity.type == "url": - entity.url = message.text[entity.offset : entity.offset + entity.length] + entity.url = message.text[ + entity.offset : entity.offset + entity.length + ] if not entity.url: continue confidence = await verify_link(entity.url) - if confidence > 0.75: - await message.reply(f"Holy smokes, another one (~{confidence*100:.0f}% sure)") - await message.delete() - - + if confidence > 0.9: + detected_links.append((entity.url, confidence)) + if detected_links: + if message.from_user: + await message.reply( + str.join( + "\n", + [ + f"Found {len(detected_links)} links:", + str.join( + "\n", + [ + f"{i}. {sanitize_link(url)} with confidence {confidence:.2f}" + for i, (url, confidence) in enumerate( + detected_links, 1 + ) + ], + ), + f"Sender: {message.from_user.full_name} #{message.from_user.id} (@{message.from_user.username})", + ], + ), + parse_mode="html", + ) + await message.delete() diff --git a/nfuck/__main__.py b/nfuck/__main__.py index 8e8ff5e..8afd27c 100644 --- a/nfuck/__main__.py +++ b/nfuck/__main__.py @@ -2,6 +2,12 @@ from aiogram import Bot from aiosqlite import connect as asqlite from os import environ from nfuck import dp +import logging.config +from json import load as load_json + +with open("logging.json", "r") as f_in: + logging.config.dictConfig(load_json(f_in)) + async def main(): bot = Bot(environ["TG_BOT_TOKEN"]) @@ -9,7 +15,8 @@ async def main(): await dp.start_polling(bot, db=db) await db.close() + if __name__ == "__main__": from asyncio import run - run(main()) + run(main()) diff --git a/nfuck/link_verifier.py b/nfuck/link_verifier.py index 52eb915..aa5db90 100644 --- a/nfuck/link_verifier.py +++ b/nfuck/link_verifier.py @@ -1,6 +1,10 @@ from httpx import AsyncClient from re import Match, Pattern, compile as regexp, IGNORECASE from random import choice +from logging import DEBUG, getLogger + +logger = getLogger("nfuck.link_verifier") +logger.setLevel(DEBUG) USER_AGENT = [ "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0" @@ -19,6 +23,7 @@ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [ MAX_SCORE = sum(t[0] for t in REGEX_PATTERNS) + def explain_verification(content: str) -> list[tuple[float, str, Match]]: result: list[tuple[float, str, Match]] = [] for score, regex, explanation in REGEX_PATTERNS: @@ -26,15 +31,22 @@ def explain_verification(content: str) -> list[tuple[float, str, Match]]: result.append((score, explanation, match)) return result + def get_random_useragent() -> str: return choice(USER_AGENT) + async def verify_link(url: str) -> float: total_score = 0 + logger.info("Verifying link %s", url) + if not url.startswith("http"): + url = "https://" + url async with AsyncClient( headers={"User-Agent": get_random_useragent()} ) as client: data = await client.get(url) - for score, _, _ in explain_verification(data.text): + for score, explanation, match in explain_verification(data.text): + logger.debug("%s: %s at %d", url, explanation, match.start()) total_score += score + logger.info("Score for %r: %f", url, total_score) return total_score / MAX_SCORE diff --git a/nfuck/utils.py b/nfuck/utils.py new file mode 100644 index 0000000..5a6d704 --- /dev/null +++ b/nfuck/utils.py @@ -0,0 +1,2 @@ +def sanitize_link(url: str) -> str: + return url.replace("://", "[://]").replace(".", "[dot]")