This commit is contained in:
Casey 2024-03-17 13:47:55 +03:00
parent 92a0689eb6
commit 5bb995a885
Signed by: hkc
GPG Key ID: F0F6CFE11CDB0960
1 changed files with 4 additions and 6 deletions

View File

@ -1,3 +1,4 @@
from typing import NamedTuple, Optional
from httpx import AsyncClient from httpx import AsyncClient
from re import Match, Pattern, compile as regexp, IGNORECASE from re import Match, Pattern, compile as regexp, IGNORECASE
from random import choice from random import choice
@ -17,7 +18,7 @@ USER_AGENT = [
] ]
URL_PATTERNS: list[tuple[float, Pattern, str]] = [ URL_PATTERNS: list[tuple[float, Pattern, str]] = [
(10.0, regexp(r"https://t.me/\w+[bB]ot/claim"), "Telegram Bot claim link") (30.0, regexp(r"https://t.me/\w+[bB]ot/claim"), "Telegram Bot claim link")
] ]
REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
@ -36,8 +37,7 @@ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
(3.0, regexp(r"A collection of \w+ NFTs", IGNORECASE), "Collection of [some] NFTs"), (3.0, regexp(r"A collection of \w+ NFTs", IGNORECASE), "Collection of [some] NFTs"),
] ]
MAX_REGEX_SCORE = 30 # sum(t[0] for t in REGEX_PATTERNS) MAX_SCORE = 30 # sum(t[0] for t in REGEX_PATTERNS)
MAX_URL_SCORE = 10
def explain_verification(content: str) -> list[tuple[float, str, Match]]: def explain_verification(content: str) -> list[tuple[float, str, Match]]:
@ -64,8 +64,6 @@ async def verify_link(url: str) -> float:
for score, regex, explanation in REGEX_PATTERNS: for score, regex, explanation in REGEX_PATTERNS:
for match in regex.finditer(url): for match in regex.finditer(url):
total_score += score total_score += score
if total_score >= MAX_REGEX_SCORE:
return total_score / MAX_REGEX_SCORE
async with AsyncClient( async with AsyncClient(
headers={"User-Agent": get_random_useragent()} headers={"User-Agent": get_random_useragent()}
) as client: ) as client:
@ -74,4 +72,4 @@ async def verify_link(url: str) -> float:
logger.debug("%s: %s at %d", url, explanation, match.start()) logger.debug("%s: %s at %d", url, explanation, match.start())
total_score += score total_score += score
logger.info("Score for %r: %f", url, total_score) logger.info("Score for %r: %f", url, total_score)
return total_score / MAX_REGEX_SCORE return total_score / MAX_SCORE