From 4cfc74887c9dd1b10c279f06b1fcefd7ccc11d55 Mon Sep 17 00:00:00 2001 From: hkc Date: Sat, 27 Jan 2024 12:19:15 +0300 Subject: [PATCH] Added link scoring system --- nfuck/link_verifier.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 nfuck/link_verifier.py diff --git a/nfuck/link_verifier.py b/nfuck/link_verifier.py new file mode 100644 index 0000000..a603676 --- /dev/null +++ b/nfuck/link_verifier.py @@ -0,0 +1,37 @@ +from httpx import AsyncClient +from re import Match, Pattern, compile as regexp, IGNORECASE +from random import choice + +USER_AGENT = [ + "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0" +] + +REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [ + (1.0, regexp(r"\bp2e\b", IGNORECASE), "Play-to-earn keyword"), + (5.0, regexp(r"play\-to\-earn", IGNORECASE), "Play-to-earn directly"), + (3.0, regexp(r"encryption\.js", IGNORECASE), "Metamask"), + (2.0, regexp(r"\bweb3\b", IGNORECASE), "Web3 mention"), + (1.0, regexp(r"\bnft\b", IGNORECASE), "NFT mention"), + (0.7, regexp(r"\belon\b", IGNORECASE), "Cryptobro Elon"), + (0.5, regexp(r"\bbiden\b", IGNORECASE), "Sleepy Joe"), + (1.0, regexp(r"\bcrypto\b", IGNORECASE), "Crypto mention"), +] + +MAX_SCORE = sum(t[0] for t in REGEX_PATTERNS) + +def explain_verification(content: str) -> list[tuple[float, str, Match]]: + result: list[tuple[float, str, Match]] = [] + for score, regex, explanation in REGEX_PATTERNS: + for match in regex.finditer(content): + result.append((score, explanation, match)) + return result + +async def verify_link(url: str) -> float: + total_score = 0 + async with AsyncClient( + headers={"User-Agent": choice(USER_AGENT)} + ) as client: + data = await client.get(url) + for score, _, _ in explain_verification(data.text): + total_score += score + return total_score / MAX_SCORE