Пример #1
0
import __hpx__ as hpx
import os
import arrow
import datetime
import html
import extractors
from extractors import common

log = hpx.get_logger(__name__)

options = {}


def get_common_data(datatypes, fpath):
    assert isinstance(datatypes, common.DataType)
    d = {}
    fpath = hpx.command.CoreFS(fpath)

    for datatype in common.DataType:
        if datatype & datatypes:
            log.info(f"Attempting with {datatype}")
            md = {}

            ex = common.extractors.get(datatype, None)
            if ex:
                try:
                    fdata = ex.file_to_dict(fpath)
                except ValueError:
                    log.info(f"Skipping {datatype}")
                    continue
                if fdata:
Пример #2
0
# main.py
import __hpx__ as hpx
import pickle
import os

from bs4 import BeautifulSoup

log = hpx.get_logger("main")

current_user_name = ""
status_text = ""
response = None
user_dict = None

save_file = os.path.join(hpx.constants.current_dir, '.info')

default_delay = 8

HEADERS = {
    'user-agent':
    "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
}

match_url_prefix = r"^(http\:\/\/|https\:\/\/)?(www\.)?"  # http:// or https:// + www.
match_url_end = r"\/?$"

url_regex = match_url_prefix + r"((exhentai|(g\.)?e-hentai)\.org)" + match_url_end

MAIN_URLS = {'eh': "https://e-hentai.org", 'ex': "https://exhentai.org"}

URLS = MAIN_URLS