import __hpx__ as hpx import os import arrow import datetime import html import extractors from extractors import common log = hpx.get_logger(__name__) options = {} def get_common_data(datatypes, fpath): assert isinstance(datatypes, common.DataType) d = {} fpath = hpx.command.CoreFS(fpath) for datatype in common.DataType: if datatype & datatypes: log.info(f"Attempting with {datatype}") md = {} ex = common.extractors.get(datatype, None) if ex: try: fdata = ex.file_to_dict(fpath) except ValueError: log.info(f"Skipping {datatype}") continue if fdata:
# main.py import __hpx__ as hpx import pickle import os from bs4 import BeautifulSoup log = hpx.get_logger("main") current_user_name = "" status_text = "" response = None user_dict = None save_file = os.path.join(hpx.constants.current_dir, '.info') default_delay = 8 HEADERS = { 'user-agent': "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0" } match_url_prefix = r"^(http\:\/\/|https\:\/\/)?(www\.)?" # http:// or https:// + www. match_url_end = r"\/?$" url_regex = match_url_prefix + r"((exhentai|(g\.)?e-hentai)\.org)" + match_url_end MAIN_URLS = {'eh': "https://e-hentai.org", 'ex': "https://exhentai.org"} URLS = MAIN_URLS