示例#1
0
from __future__ import annotations
from typing import Union, Optional

import asyncio
from telethon.tl.types import DocumentAttributeVideo, DocumentAttributeAnimated
from telethon.errors.rpcerrorlist import SlowModeWaitError, FloodWaitError
from asyncio import BoundedSemaphore

from src import log, env, locks
from src.parsing.medium import Medium

logger = log.getLogger('RSStT.message')


class Message:
    no_retry = False
    __overall_concurrency = 30
    __overall_semaphore = BoundedSemaphore(__overall_concurrency)

    __lock_type = 'r'

    def __init__(self,
                 text: Optional[str] = None,
                 media: Optional[Union[list[Medium], list[Medium],
                                       Medium]] = None,
                 parse_mode: Optional[str] = 'HTML'):
        self.text = text
        self.media = media
        self.parse_mode = parse_mode
        self.retries = 0
from tortoise import Tortoise
from aerich import Command

from . import config, models
from src import env, log
from . import effective_utils

logger = log.getLogger('RSStT.db')

User = models.User
Feed = models.Feed
Sub = models.Sub
Option = models.Option
EffectiveOptions = effective_utils.EffectiveOptions
EffectiveTasks = effective_utils.EffectiveTasks


async def init():
    if env.DATABASE_URL.startswith('sqlite'):
        aerich_command = Command(tortoise_config=config.TORTOISE_ORM, location='src/db/migrations_sqlite')
    elif env.DATABASE_URL.startswith('postgres'):
        aerich_command = Command(tortoise_config=config.TORTOISE_ORM, location='src/db/migrations_pgsql')
    else:
        aerich_command = None
        logger.critical('INVALID DB SCHEME! ONLY "sqlite" AND "postgres" ARE SUPPORTED!')
        exit(1)
    await aerich_command.init()
    await aerich_command.upgrade()
    # await Tortoise.init(config=config.TORTOISE_ORM)
    await effective_utils.init()
    logger.info('Successfully connected to the DB')
示例#3
0
import asyncio
import re
from functools import partial, wraps
from telethon import events
from telethon.tl import types
from telethon.tl.patched import Message, MessageService
from telethon.tl.functions.bots import SetBotCommandsRequest
from telethon.tl.functions.channels import GetParticipantRequest
from telethon.errors import FloodError, MessageNotModifiedError, UserNotParticipantError, QueryIdInvalidError, \
    UserIsBlockedError, ChatWriteForbiddenError, UserIdInvalidError, ChannelPrivateError

from src import env, log, db, locks
from src.i18n import i18n

logger = log.getLogger('RSStT.command')

# ANONYMOUS_ADMIN = 1087968824  # no need for MTProto, user_id will be `None` for anonymous admins


def parse_command(command: str) -> list[AnyStr]:
    return re.split(r'\s+', command.strip())


def parse_callback_data_with_page(callback_data: bytes) -> tuple[int, int]:
    """
    callback data = command_{id}[|{page}]

    :param callback_data: callback data
    :return: id, page
    """
示例#4
0
from __future__ import annotations

import asyncio
import re
from telethon.tl.types import InputMediaPhotoExternal, InputMediaDocumentExternal

from src import env, log, web
from src.parsing import post

logger = log.getLogger('RSStT.medium')

sizes = ['large', 'mw2048', 'mw1024', 'mw720', 'middle']
sizeParser = re.compile(r'(?P<domain>^https?://\w+\.sinaimg\.\S+/)'
                        r'(?P<size>large|mw2048|mw1024|mw720|middle)'
                        r'(?P<filename>/\w+\.\w+$)')
serverParser = re.compile(r'(?P<url_prefix>^https?://[a-zA-Z_-]+)'
                          r'(?P<server_id>\d)'
                          r'(?P<url_suffix>\.sinaimg\.\S+$)')

_web_semaphore = asyncio.BoundedSemaphore(5)


class Medium:
    type = 'medium_base_class'
    max_size = 20971520

    def __init__(self, url: str):
        self.url = url
        self.original_url = url
        self.valid = None
        self._server_change_count = 0
示例#5
0
import aiohttp
import feedparser
from concurrent.futures import ThreadPoolExecutor
from aiohttp_socks import ProxyConnector
from aiohttp_retry import RetryClient, ExponentialRetry
from ssl import SSLError
from ipaddress import ip_network, ip_address
from urllib.parse import urlparse
from socket import AF_INET, AF_INET6
from multidict import CIMultiDictProxy
from attr import define

from src import env, log, locks
from src.i18n import i18n

logger = log.getLogger('RSStT.web')

_feedparser_thread_pool = ThreadPoolExecutor(1, 'feedparser_')
_resolver = aiodns.DNSResolver(timeout=3, loop=env.loop)

PROXY = env.R_PROXY.replace('socks5h', 'socks5').replace(
    'sock4a', 'socks4') if env.R_PROXY else None
PRIVATE_NETWORKS = tuple(
    ip_network(ip_block) for ip_block in (
        '127.0.0.0/8',
        '::1/128',  # loopback is not a private network, list in here for convenience
        '169.254.0.0/16',
        'fe80::/10',  # link-local address
        '10.0.0.0/8',  # class A private network
        '172.16.0.0/12',  # class B private networks
        '192.168.0.0/16',  # class C private networks
示例#6
0
from functools import partial
from time import sleep
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from telethon import TelegramClient, events
from telethon.errors import ApiIdPublishedFloodError
from telethon.tl import types
from random import sample
from pathlib import Path

from src import env, log, db, command
from src.i18n import i18n, ALL_LANGUAGES
from src.parsing import tgraph

# log
logger = log.getLogger('RSStT')

# initializing bot
loop = env.loop

Path("config").mkdir(parents=True, exist_ok=True)
bot: Optional[TelegramClient] = None
if not env.API_ID or not env.API_HASH:
    logger.info(
        'API_ID and/or API_HASH not set, use sample APIs instead. API_ID_PUBLISHED_FLOOD_ERROR may occur.'
    )
    API_KEYs = {
        api_id: env.SAMPLE_APIS[api_id]
        for api_id in sample(tuple(env.SAMPLE_APIS.keys()), len(
            env.SAMPLE_APIS))
    }
示例#7
0
    VideoContentTypeInvalidError, VideoFileInvalidError, ExternalUrlInvalidError,

    # errors caused by server instability or network instability between img server and telegram server
    WebpageCurlFailedError, WebpageMediaEmptyError, MediaEmptyError, FileReferenceExpiredError,
    BadRequestError,  # only FILE_REFERENCE_\d_EXPIRED

    # errors caused by lack of permission
    UserIsBlockedError, UserIdInvalidError, ChatWriteForbiddenError, ChannelPrivateError
)

from src import env, message, log, web
from src.parsing import tgraph
from src.parsing.medium import Video, Image, Media, Animation
from src.parsing.html_text import *

logger = log.getLogger('RSStT.post')

# python-Levenshtein cannot handle UTF-8 input properly, mute the annoying warning from fuzzywuzzy
import warnings

warnings.original_warn = warnings.warn
warnings.warn = lambda *args, **kwargs: None
from fuzzywuzzy import fuzz

warnings.warn = warnings.original_warn

stripNewline = re.compile(r'\n{3,}', )
stripLineEnd = re.compile(r'[ \t\xa0]+\n')
isEmoticon = re.compile(r'(width|height): ?(([012]?\d|30)(\.\d)?px|[01](\.\d)?em)')
fileReferenceNExpired = re.compile(r'FILE_REFERENCE_(?:\d_)?EXPIRED')
示例#8
0
from datetime import datetime, timedelta, timezone
from email.utils import format_datetime
from telethon.errors.rpcerrorlist import UserIsBlockedError, ChatWriteForbiddenError, UserIdInvalidError, \
    ChannelPrivateError
from collections import defaultdict, Counter

from . import inner
from .utils import escape_html
from .inner.utils import get_hash, update_interval, deactivate_feed
from src import log, db, env
from src.exceptions import EntityNotFoundError
from src.i18n import i18n
from src.parsing.post import get_post_from_entry, Post
from src.web import feed_get

logger = log.getLogger('RSStT.monitor')

NOT_UPDATED: Final = 'not_updated'
CACHED: Final = 'cached'
EMPTY: Final = 'empty'
FAILED: Final = 'failed'
UPDATED: Final = 'updated'
SKIPPED: Final = 'skipped'

# it may cause memory leak, but they are too small that leaking thousands of that is still not a big deal!
__user_unsub_all_lock_bucket: dict[int, asyncio.Lock] = defaultdict(asyncio.Lock)
__user_entity_not_found_counter = Counter()


class MonitoringLogs:
    monitoring_counts = 0
示例#9
0
from __future__ import annotations
from typing import Union

import asyncio
import time
import aiographfix as aiograph
from bs4 import BeautifulSoup
from aiohttp import ClientTimeout, ClientError
from aiohttp_retry import RetryClient
from aiohttp_socks import ProxyConnector

from src import env, log

logger = log.getLogger('RSStT.tgraph')


class Telegraph(aiograph.Telegraph):
    def __init__(self, token=None):
        self.last_run = 0
        self._fc_lock = asyncio.Lock()  # lock: wait if exceed flood control
        self._request_lock = asyncio.Lock(
        )  # lock: only one request can be sent at the same time
        super().__init__(token)

    async def replace_session(self):
        await self.session.close()
        proxy_connector = ProxyConnector(**env.TELEGRAPH_PROXY_DICT, loop=self.loop) \
            if env.TELEGRAPH_PROXY_DICT else None
        self.session = RetryClient(connector=proxy_connector,
                                   timeout=ClientTimeout(total=10),
                                   loop=self.loop,