from __future__ import annotations from typing import Union, Optional import asyncio from telethon.tl.types import DocumentAttributeVideo, DocumentAttributeAnimated from telethon.errors.rpcerrorlist import SlowModeWaitError, FloodWaitError from asyncio import BoundedSemaphore from src import log, env, locks from src.parsing.medium import Medium logger = log.getLogger('RSStT.message') class Message: no_retry = False __overall_concurrency = 30 __overall_semaphore = BoundedSemaphore(__overall_concurrency) __lock_type = 'r' def __init__(self, text: Optional[str] = None, media: Optional[Union[list[Medium], list[Medium], Medium]] = None, parse_mode: Optional[str] = 'HTML'): self.text = text self.media = media self.parse_mode = parse_mode self.retries = 0
from tortoise import Tortoise from aerich import Command from . import config, models from src import env, log from . import effective_utils logger = log.getLogger('RSStT.db') User = models.User Feed = models.Feed Sub = models.Sub Option = models.Option EffectiveOptions = effective_utils.EffectiveOptions EffectiveTasks = effective_utils.EffectiveTasks async def init(): if env.DATABASE_URL.startswith('sqlite'): aerich_command = Command(tortoise_config=config.TORTOISE_ORM, location='src/db/migrations_sqlite') elif env.DATABASE_URL.startswith('postgres'): aerich_command = Command(tortoise_config=config.TORTOISE_ORM, location='src/db/migrations_pgsql') else: aerich_command = None logger.critical('INVALID DB SCHEME! ONLY "sqlite" AND "postgres" ARE SUPPORTED!') exit(1) await aerich_command.init() await aerich_command.upgrade() # await Tortoise.init(config=config.TORTOISE_ORM) await effective_utils.init() logger.info('Successfully connected to the DB')
import asyncio import re from functools import partial, wraps from telethon import events from telethon.tl import types from telethon.tl.patched import Message, MessageService from telethon.tl.functions.bots import SetBotCommandsRequest from telethon.tl.functions.channels import GetParticipantRequest from telethon.errors import FloodError, MessageNotModifiedError, UserNotParticipantError, QueryIdInvalidError, \ UserIsBlockedError, ChatWriteForbiddenError, UserIdInvalidError, ChannelPrivateError from src import env, log, db, locks from src.i18n import i18n logger = log.getLogger('RSStT.command') # ANONYMOUS_ADMIN = 1087968824 # no need for MTProto, user_id will be `None` for anonymous admins def parse_command(command: str) -> list[AnyStr]: return re.split(r'\s+', command.strip()) def parse_callback_data_with_page(callback_data: bytes) -> tuple[int, int]: """ callback data = command_{id}[|{page}] :param callback_data: callback data :return: id, page """
from __future__ import annotations import asyncio import re from telethon.tl.types import InputMediaPhotoExternal, InputMediaDocumentExternal from src import env, log, web from src.parsing import post logger = log.getLogger('RSStT.medium') sizes = ['large', 'mw2048', 'mw1024', 'mw720', 'middle'] sizeParser = re.compile(r'(?P<domain>^https?://\w+\.sinaimg\.\S+/)' r'(?P<size>large|mw2048|mw1024|mw720|middle)' r'(?P<filename>/\w+\.\w+$)') serverParser = re.compile(r'(?P<url_prefix>^https?://[a-zA-Z_-]+)' r'(?P<server_id>\d)' r'(?P<url_suffix>\.sinaimg\.\S+$)') _web_semaphore = asyncio.BoundedSemaphore(5) class Medium: type = 'medium_base_class' max_size = 20971520 def __init__(self, url: str): self.url = url self.original_url = url self.valid = None self._server_change_count = 0
import aiohttp import feedparser from concurrent.futures import ThreadPoolExecutor from aiohttp_socks import ProxyConnector from aiohttp_retry import RetryClient, ExponentialRetry from ssl import SSLError from ipaddress import ip_network, ip_address from urllib.parse import urlparse from socket import AF_INET, AF_INET6 from multidict import CIMultiDictProxy from attr import define from src import env, log, locks from src.i18n import i18n logger = log.getLogger('RSStT.web') _feedparser_thread_pool = ThreadPoolExecutor(1, 'feedparser_') _resolver = aiodns.DNSResolver(timeout=3, loop=env.loop) PROXY = env.R_PROXY.replace('socks5h', 'socks5').replace( 'sock4a', 'socks4') if env.R_PROXY else None PRIVATE_NETWORKS = tuple( ip_network(ip_block) for ip_block in ( '127.0.0.0/8', '::1/128', # loopback is not a private network, list in here for convenience '169.254.0.0/16', 'fe80::/10', # link-local address '10.0.0.0/8', # class A private network '172.16.0.0/12', # class B private networks '192.168.0.0/16', # class C private networks
from functools import partial from time import sleep from typing import Optional from apscheduler.schedulers.asyncio import AsyncIOScheduler from telethon import TelegramClient, events from telethon.errors import ApiIdPublishedFloodError from telethon.tl import types from random import sample from pathlib import Path from src import env, log, db, command from src.i18n import i18n, ALL_LANGUAGES from src.parsing import tgraph # log logger = log.getLogger('RSStT') # initializing bot loop = env.loop Path("config").mkdir(parents=True, exist_ok=True) bot: Optional[TelegramClient] = None if not env.API_ID or not env.API_HASH: logger.info( 'API_ID and/or API_HASH not set, use sample APIs instead. API_ID_PUBLISHED_FLOOD_ERROR may occur.' ) API_KEYs = { api_id: env.SAMPLE_APIS[api_id] for api_id in sample(tuple(env.SAMPLE_APIS.keys()), len( env.SAMPLE_APIS)) }
VideoContentTypeInvalidError, VideoFileInvalidError, ExternalUrlInvalidError, # errors caused by server instability or network instability between img server and telegram server WebpageCurlFailedError, WebpageMediaEmptyError, MediaEmptyError, FileReferenceExpiredError, BadRequestError, # only FILE_REFERENCE_\d_EXPIRED # errors caused by lack of permission UserIsBlockedError, UserIdInvalidError, ChatWriteForbiddenError, ChannelPrivateError ) from src import env, message, log, web from src.parsing import tgraph from src.parsing.medium import Video, Image, Media, Animation from src.parsing.html_text import * logger = log.getLogger('RSStT.post') # python-Levenshtein cannot handle UTF-8 input properly, mute the annoying warning from fuzzywuzzy import warnings warnings.original_warn = warnings.warn warnings.warn = lambda *args, **kwargs: None from fuzzywuzzy import fuzz warnings.warn = warnings.original_warn stripNewline = re.compile(r'\n{3,}', ) stripLineEnd = re.compile(r'[ \t\xa0]+\n') isEmoticon = re.compile(r'(width|height): ?(([012]?\d|30)(\.\d)?px|[01](\.\d)?em)') fileReferenceNExpired = re.compile(r'FILE_REFERENCE_(?:\d_)?EXPIRED')
from datetime import datetime, timedelta, timezone from email.utils import format_datetime from telethon.errors.rpcerrorlist import UserIsBlockedError, ChatWriteForbiddenError, UserIdInvalidError, \ ChannelPrivateError from collections import defaultdict, Counter from . import inner from .utils import escape_html from .inner.utils import get_hash, update_interval, deactivate_feed from src import log, db, env from src.exceptions import EntityNotFoundError from src.i18n import i18n from src.parsing.post import get_post_from_entry, Post from src.web import feed_get logger = log.getLogger('RSStT.monitor') NOT_UPDATED: Final = 'not_updated' CACHED: Final = 'cached' EMPTY: Final = 'empty' FAILED: Final = 'failed' UPDATED: Final = 'updated' SKIPPED: Final = 'skipped' # it may cause memory leak, but they are too small that leaking thousands of that is still not a big deal! __user_unsub_all_lock_bucket: dict[int, asyncio.Lock] = defaultdict(asyncio.Lock) __user_entity_not_found_counter = Counter() class MonitoringLogs: monitoring_counts = 0
from __future__ import annotations from typing import Union import asyncio import time import aiographfix as aiograph from bs4 import BeautifulSoup from aiohttp import ClientTimeout, ClientError from aiohttp_retry import RetryClient from aiohttp_socks import ProxyConnector from src import env, log logger = log.getLogger('RSStT.tgraph') class Telegraph(aiograph.Telegraph): def __init__(self, token=None): self.last_run = 0 self._fc_lock = asyncio.Lock() # lock: wait if exceed flood control self._request_lock = asyncio.Lock( ) # lock: only one request can be sent at the same time super().__init__(token) async def replace_session(self): await self.session.close() proxy_connector = ProxyConnector(**env.TELEGRAPH_PROXY_DICT, loop=self.loop) \ if env.TELEGRAPH_PROXY_DICT else None self.session = RetryClient(connector=proxy_connector, timeout=ClientTimeout(total=10), loop=self.loop,