Docka and better explanation of removal

2024-02-02 22:06:42 +03:00 · 2024-02-02 22:06:42 +03:00 · 4a207ad544
parent e137933115
commit 4a207ad544
6 changed files with 113 additions and 12 deletions
--- a/11
+++ b/11
@ -0,0 +1,11 @@
 FROM python:3.10-alpine
 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
 RUN pip install -r /app/requirements.txt && rm /app/requirements.txt
 COPY . /app
 CMD ["python3", "-m", "nfuck"]
--- a/logging.json
+++ b/logging.json
@ -0,0 +1,24 @@
 {
  "version": 1,
  "disable_existing_loggers": false,
  "formatters": {
    "simple": {
      "format": "%(asctime)s %(levelname)s %(module)s L%(lineno)d: %(message)s"
    }
  },
  "handlers": {
    "stdout": {
      "class": "logging.StreamHandler",
      "formatter": "simple",
      "stream": "ext://sys.stdout"
    }
  },
  "loggers": {
    "root": {
      "level": "DEBUG",
      "handlers": [
        "stdout"
      ]
    }
  }
 }
--- a/nfuck/init.py
+++ b/nfuck/init.py
@ -3,44 +3,89 @@ from aiogram.types import Message
 from aiogram.filters import Command
 from httpx import AsyncClient
-from nfuck.link_verifier import explain_verification, get_random_useragent, verify_link
+from nfuck.link_verifier import (
    explain_verification,
    get_random_useragent,
    verify_link,
 )
 from nfuck.utils import sanitize_link
 dp = Dispatcher()
 # @dp.message(Command("dd"))
 # async def on_dd(message: Message):
 #     if message.reply_to_message:
 #         await message.reply_to_message.delete()
@dp.message(Command("check"))
 async def on_check(message: Message):
    results = []
    for entity in message.entities or []:
        if entity.type in ("text_link", "url") and message.text:
            if entity.type == "url":
-                entity.url = message.text[entity.offset : entity.offset + entity.length]
+                entity.url = message.text[
                    entity.offset : entity.offset + entity.length
                ]
            if not entity.url:
                continue
            if not entity.url.startswith("http"):
                entity.url = "https://" + entity.url
            async with AsyncClient(
                headers={"User-Agent": get_random_useragent()}
            ) as client:
                data = (await client.get(entity.url)).text
                total_score = 0
-                results.append(f"<b>{entity.url}</b>")
+                results.append(f"<b>{sanitize_link(entity.url)}</b>")
                for score, explanation, match in explain_verification(data):
                    results.append(f"{match.span()}: {explanation}")
                    total_score += score
                results.append(f"<b>Total score: {total_score}</b>")
                results.append("")
-    await message.reply(str.join("\n", results), parse_mode="html")
+    if results:
        await message.reply(
            str.join("\n", results),
            parse_mode="html",
            disable_web_page_preview=True,
        )
    else:
        await message.reply(":shrug:")
@dp.message()
 async def on_message(message: Message):
    detected_links: list[tuple[str, float]] = []
    for entity in message.entities or []:
        if entity.type in ("text_link", "url") and message.text:
            if entity.type == "url":
-                entity.url = message.text[entity.offset : entity.offset + entity.length]
+                entity.url = message.text[
                    entity.offset : entity.offset + entity.length
                ]
            if not entity.url:
                continue
            confidence = await verify_link(entity.url)
-            if confidence > 0.75:
+            if confidence > 0.9:
-                await message.reply(f"Holy smokes, another one (~{confidence*100:.0f}% sure)")
+                detected_links.append((entity.url, confidence))
-                await message.delete()
+    if detected_links:
-
+        if message.from_user:
-
+            await message.reply(
                str.join(
                    "\n",
                    [
                        f"Found {len(detected_links)} links:",
                        str.join(
                            "\n",
                            [
                                f"{i}. {sanitize_link(url)} with confidence {confidence:.2f}"
                                for i, (url, confidence) in enumerate(
                                    detected_links, 1
                                )
                            ],
                        ),
                        f"Sender: {message.from_user.full_name} #{message.from_user.id} (@{message.from_user.username})",
                    ],
                ),
                parse_mode="html",
            )
        await message.delete()
--- a/nfuck/main.py
+++ b/nfuck/main.py
@ -2,6 +2,12 @@ from aiogram import Bot
 from aiosqlite import connect as asqlite
 from os import environ
 from nfuck import dp
 import logging.config
 from json import load as load_json
 with open("logging.json", "r") as f_in:
    logging.config.dictConfig(load_json(f_in))
 async def main():
    bot = Bot(environ["TG_BOT_TOKEN"])
@ -9,7 +15,8 @@ async def main():
    await dp.start_polling(bot, db=db)
    await db.close()
 if __name__ == "__main__":
    from asyncio import run
    run(main())
    run(main())
--- a/nfuck/link_verifier.py
+++ b/nfuck/link_verifier.py
@ -1,6 +1,10 @@
 from httpx import AsyncClient
 from re import Match, Pattern, compile as regexp, IGNORECASE
 from random import choice
 from logging import DEBUG, getLogger
 logger = getLogger("nfuck.link_verifier")
 logger.setLevel(DEBUG)
 USER_AGENT = [
    "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
@ -19,6 +23,7 @@ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
 MAX_SCORE = sum(t[0] for t in REGEX_PATTERNS)
 def explain_verification(content: str) -> list[tuple[float, str, Match]]:
    result: list[tuple[float, str, Match]] = []
    for score, regex, explanation in REGEX_PATTERNS:
@ -26,15 +31,22 @@ def explain_verification(content: str) -> list[tuple[float, str, Match]]:
            result.append((score, explanation, match))
    return result
 def get_random_useragent() -> str:
    return choice(USER_AGENT)
 async def verify_link(url: str) -> float:
    total_score = 0
    logger.info("Verifying link %s", url)
    if not url.startswith("http"):
        url = "https://" + url
    async with AsyncClient(
        headers={"User-Agent": get_random_useragent()}
    ) as client:
        data = await client.get(url)
-        for score, _, _ in explain_verification(data.text):
+        for score, explanation, match in explain_verification(data.text):
            logger.debug("%s: %s at %d", url, explanation, match.start())
            total_score += score
    logger.info("Score for %r: %f", url, total_score)
    return total_score / MAX_SCORE
--- a/nfuck/utils.py
+++ b/nfuck/utils.py
@ -0,0 +1,2 @@
 def sanitize_link(url: str) -> str:
    return url.replace("://", "[://]").replace(".", "[dot]")
		`@ -0,0 +1,2 @@`
							`def sanitize_link(url: str) -> str:`
							`return url.replace("://", "[://]").replace(".", "[dot]")`