Exemplo n.º 1
0
def deviantart_login(path: str) -> None:
    log.init(path, True)
    db.init(path)
    args = ['--cookies', db.get_rootpath() + '/cookies.txt']
    args += ['-o', 'cache.file=' + db.get_rootpath() + '/gallery-dl-cache.db']
    args += ['oauth:deviantart']
    gallery_dl_utils.run_gallery_dl_with_custom_args(args)
Exemplo n.º 2
0
def mass_add_urls(path: str, file_: str, additional_data: Optional[str], metadata_only: bool, overwrite_existing: bool, filter_: Optional[str], ignore_anchor: bool, max_files: Optional[int]) -> None:
    log.init(path, True)
    db.init(path)
    for line in open(file_, 'r'):
        line = line.strip()
        if line:
            db.add_or_update_urls([{
                'url': line,
                'time_added': time.time(),
                'additional_data': additional_data,
                'metadata_only': metadata_only,
                'overwrite_existing': overwrite_existing,
                'filter': filter_,
                'ignore_anchor': ignore_anchor,
                'max_files': max_files
                }])
            log.info("hydownloader-tools", f"Added URL: {line}")
Exemplo n.º 3
0
def mass_add_subscriptions(path: str, file_: str, downloader: str, additional_data: Optional[str], paused: bool, filter_: Optional[str], abort_after: int, max_files_initial: Optional[int], max_files_regular: Optional[int]) -> None:
    log.init(path, True)
    db.init(path)
    for line in open(file_, 'r'):
        line = line.strip()
        if line:
            db.add_or_update_subscriptions([{
                'keywords': line,
                'downloader': downloader,
                'time_created': time.time(),
                'additional_data': additional_data,
                'filter': filter_,
                'max_files_initial': max_files_initial,
                'max_files_regular': max_files_regular,
                'abort_after': abort_after,
                'paused': paused
                }])
            log.info("hydownloader-tools", f"Added subscription {line} with downloader {downloader}")
Exemplo n.º 4
0
def mass_add_subscriptions(path: str, file_: str, downloader: str,
                           additional_data: Optional[str], paused: bool,
                           filter_: Optional[str], abort_after: int,
                           max_files_initial: Optional[int],
                           max_files_regular: Optional[int],
                           check_interval: int, random_check_interval: int,
                           encode_keywords: bool) -> None:
    log.init(path, True)
    db.init(path)
    for line in open(file_, 'r', encoding='utf-8-sig'):
        line = line.strip()
        if encode_keywords:
            line = line.replace(' ', '+')
            line = urllib.parse.quote(line, safe='/+').lower()
        if line:
            new_sub = {
                'keywords':
                line,
                'downloader':
                downloader,
                'time_created':
                time.time(),
                'additional_data':
                additional_data,
                'filter':
                filter_,
                'paused':
                paused,
                'check_interval':
                check_interval + random.randint(0, random_check_interval)
            }
            if max_files_initial is not None:
                new_sub['max_files_initial'] = max_files_initial
            if max_files_regular is not None:
                new_sub['max_files_regular'] = max_files_regular
            if abort_after is not None:
                new_sub['abort_after'] = abort_after
            db.add_or_update_subscriptions([new_sub])
            log.info(
                "hydownloader-tools",
                f"Added subscription {line} with downloader {downloader}")
Exemplo n.º 5
0
def start(path : str, debug : bool, no_sub_worker: bool, no_url_worker: bool) -> None:
    log.init(path, debug)
    db.init(path)

    output_postprocessors.process_additional_data()
    output_postprocessors.parse_log_files()

    if not no_sub_worker:
        subs_thread = threading.Thread(target=subscription_worker, name='Subscription worker', daemon=True)
        subs_thread.start()

    if not no_url_worker:
        url_thread = threading.Thread(target=url_queue_worker, name='Single URL queue worker', daemon=True)
        url_thread.start()

    api_thread = threading.Thread(target=api_worker, args=(path, debug))
    api_thread.start()

    while not _shutdown_started and not _shutdown_requested_by_api_thread:
        time.sleep(1)
    shutdown()
Exemplo n.º 6
0
def start(path : str, debug : bool) -> None:
    log.init(path, debug)
    db.init(path)

    process_additional_data()

    subs_thread = threading.Thread(target=subscription_worker, name='Subscription worker', daemon=True)
    subs_thread.start()

    url_thread = threading.Thread(target=url_queue_worker, name='Single URL queue worker', daemon=True)
    url_thread.start()

    if db.get_conf('daemon.ssl') and os.path.isfile(path+"/server.pem"):
        log.info("hydownloader", "Starting daemon (with SSL)...")
        srv = SSLWSGIRefServer(path+"/server.pem", host=db.get_conf('daemon.host'), port=db.get_conf('daemon.port'))
        bottle.run(server=srv, debug=debug)
    else:
        if db.get_conf('daemon.ssl'):
            log.warning("hydownloader", "SSL enabled in config, but no server.pem file found in the db folder, continuing without SSL...")
        log.info("hydownloader", "Starting daemon...")
        srv = SSLWSGIRefServer("", host=db.get_conf('daemon.host'), port=db.get_conf('daemon.port'))
        bottle.run(server=srv, debug=debug)
Exemplo n.º 7
0
def update_anchor(path: str, hydrus_master_db: str, sites: str,
                  unrecognized_urls_file: Optional[str],
                  recognized_urls_file: Optional[str]) -> None:
    """
    This function goes through all URLs in a Hydrus database, and tries to match them to known site-specific URL patterns to
    generate anchor database entries that gallery-dl can recognize. For some sites, the anchor format differs
    from the gallery-dl default, these are set in gallery-dl-config.json.
    """
    log.init(path, True)
    db.init(path)
    if not os.path.isfile(hydrus_master_db):
        log.fatal("hydownloader-anchor-exporter",
                  "The given client.master.db file does not exist!")
    hydrus_db = sqlite3.connect(hydrus_master_db)
    hydrus_db.row_factory = sqlite3.Row
    anchor_init_needed = not os.path.isfile(path + "/anchor.db")
    anchor_db = sqlite3.connect(path + "/anchor.db")
    hc = hydrus_db.cursor()
    ac = anchor_db.cursor()
    if anchor_init_needed:
        ac.execute('CREATE TABLE archive (entry PRIMARY KEY) WITHOUT ROWID')
        anchor_db.commit()
    ac.execute('select * from archive')
    known_anchors = {row[0] for row in ac.fetchall()}
    log.info("hydownloader-anchor-exporter",
             "Querying Hydrus database for URLs...")
    hc.execute('select * from url_domains natural inner join urls')
    rows = hc.fetchall()
    all_rows = len(rows)
    processed = 0
    suspicious_urls = set()
    recognized_urls = set()

    sites_to_keywords: dict[str, Tuple[list[str], list[str]]] = {
        'pixiv': (["pixi"], []),
        'gelbooru': (["gelbooru"], []),
        'nijie': (["nijie"], []),
        'lolibooru': (['lolibooru'], []),
        'danbooru': (['danbooru'], []),
        '3dbooru': (['behoimi'], []),
        'sankaku': (['sankaku'], ["idol."]),
        'idolcomplex': (["idol.sankaku"], []),
        'artstation': (["artstation"], []),
        'twitter': (["twitter", "nitter"], []),
        'deviantart': (['deviantart'], []),
        'tumblr': (["tumblr"], [])
    }

    siteset = {x.strip() for x in sites.split(',') if x.strip()}
    if sites == "all":
        siteset = set(sites_to_keywords.keys())
    anchors: Counter[str] = collections.Counter()

    for site in siteset:
        if not site in sites_to_keywords:
            log.fatal('hydownloader-anchor-exporter',
                      f'Unsupported site: {site}')

    def process_url(url):
        patterns = urls.anchor_patterns_from_url(url)
        if patterns:
            recognized_urls.add(url)
            anchors[patterns[0]] += 1
        else:
            suspicious_urls.add(url)

    log.info("hydownloader-anchor-exporter", "Processing URLs...")
    for row in rows:
        processed += 1
        if processed % 1000 == 0:
            print(f"Processed {processed}/{all_rows} URLs")
        for site in siteset:
            accepts, rejects = sites_to_keywords[site]
            url_ok = False
            for accept in accepts:
                if accept in row['url']:
                    url_ok = True
                    break
            if url_ok:
                for reject in rejects:
                    if reject in row['url']: url_ok = False
            if url_ok:
                process_url(row['url'])
    log.info("hydownloader-anchor-exporter", "Done processing URLs")

    if unrecognized_urls_file:
        log.info("hydownloader-anchor-exporter",
                 "Writing unrecognized URLs...")
        with open(unrecognized_urls_file, 'w') as f:
            for url in sorted(suspicious_urls):
                f.write(url.strip() + '\n')
        log.info("hydownloader-anchor-exporter",
                 "Done writing unrecognized URLs")
    if recognized_urls_file:
        log.info("hydownloader-anchor-exporter", "Writing recognized URLs...")
        with open(recognized_urls_file, 'w') as f:
            for url in sorted(recognized_urls):
                f.write(url.strip() + '\n')
        log.info("hydownloader-anchor-exporter",
                 "Done writing recognized URLs")

    log.info("hydownloader-anchor-exporter", "Inserting new anchors...")
    anchor_count = len(anchors.keys())
    processed = 0
    new_anchor_rows = 0
    for anchor in anchors:
        processed += 1
        if processed % 50 == 0:
            print(f"Inserting new anchors {processed}/{anchor_count}")
        final_anchors = [anchor]
        if anchor.startswith("nijie"):
            for i in range(anchors[anchor]):
                final_anchors.append(anchor + "_" + str(i))
        if anchor.startswith("twitter") or anchor.startswith("tumblr"):
            for i in range(anchors[anchor] + 1):
                final_anchors.append(anchor + "_" + str(i))
        if anchor.startswith("pixiv"):
            for i in range(anchors[anchor]):
                final_anchors.append(anchor + "_p{:02d}".format(i))
        for f_a in final_anchors:
            if f_a in known_anchors:
                continue
            ac.execute('insert into archive(entry) values (?)', (f_a, ))
            new_anchor_rows += 1
    log.info(
        "hydownloader-anchor-exporter",
        f"Done inserting new anchors, added {new_anchor_rows} entries in total"
    )

    anchor_db.commit()
    anchor_db.close()
    hydrus_db.close()
Exemplo n.º 8
0
def reparse_all_logfiles(path: str) -> None:
    log.init(path, True)
    db.init(path)
    output_postprocessors.parse_log_files(True)
Exemplo n.º 9
0
def init_db(path: str) -> None:
    log.init(path, True)
    db.init(path)
Exemplo n.º 10
0
def report(path: str, verbose: bool, no_urls: bool) -> None:
    log.init(path, True)
    db.init(path)
    db.report(verbose, not no_urls)
Exemplo n.º 11
0
def test(path: str, sites: str) -> None:
    log.init(path, True)
    db.init(path)
    if not test_internal(sites):
        sys.exit(1)
Exemplo n.º 12
0
def update_anchor(path: str, hydrus_db_folder: str, sites: str,
                  unrecognized_urls_file: Optional[str],
                  recognized_urls_file: Optional[str], fill_known_urls: bool,
                  keep_old_hydrus_url_data: bool) -> None:
    """
    This function goes through all URLs in a Hydrus database, and tries to match them to known site-specific URL patterns to
    generate anchor database entries that gallery-dl can recognize. For some sites, the anchor format differs
    from the gallery-dl default, these are set in gallery-dl-config.json.
    If enabled, also fills up the known_urls table in the hydownloader DB with all URLs known by Hydrus.
    """
    log.init(path, True)
    db.init(path)
    if not os.path.isfile(hydrus_db_folder + "/client.master.db"):
        log.fatal(
            "hydownloader-anchor-exporter",
            "The client.master.db database was not found at the given location!"
        )
    hydrus_db = sqlite3.connect("file:" + hydrus_db_folder +
                                "/client.master.db?mode=ro",
                                uri=True)
    hydrus_db.row_factory = sqlite3.Row
    anchor_init_needed = not os.path.isfile(path + "/anchor.db")
    anchor_db = sqlite3.connect(path + "/anchor.db")
    hc = hydrus_db.cursor()
    ac = anchor_db.cursor()
    if anchor_init_needed:
        ac.execute('CREATE TABLE archive (entry PRIMARY KEY) WITHOUT ROWID')
        anchor_db.commit()
    ac.execute('select * from archive')
    known_anchors = {row[0] for row in ac.fetchall()}
    log.info("hydownloader-anchor-exporter",
             "Querying Hydrus database for URLs...")
    hc.execute('select * from url_domains natural inner join urls')
    rows = hc.fetchall()
    all_rows = len(rows)
    processed = 0
    suspicious_urls = set()
    recognized_urls = set()
    current_url_ids = set()
    deleted_url_ids = set()
    if fill_known_urls:
        if not os.path.isfile(hydrus_db_folder + "/client.db"):
            log.fatal(
                "hydownloader-anchor-exporter",
                "The client.db database was not found at the given location!")
        client_db = sqlite3.connect("file:" + hydrus_db_folder +
                                    "/client.db?mode=ro",
                                    uri=True)
        client_db.row_factory = sqlite3.Row
        cc = client_db.cursor()
        log.info("hydownloader-anchor-exporter",
                 "Querying Hydrus database for current URL IDs...")
        cc.execute('select * from current_files natural inner join url_map')
        for row in cc.fetchall():
            current_url_ids.add(row['url_id'])
        log.info("hydownloader-anchor-exporter",
                 "Querying Hydrus database for deleted URL IDs...")
        cc.execute('select * from deleted_files natural inner join url_map')
        for row in cc.fetchall():
            deleted_url_ids.add(row['url_id'])
        client_db.close()
        if keep_old_hydrus_url_data:
            log.info(
                "hydownloader-anchor-exporter",
                "Old Hydrus URL data will NOT be deleted from the shared hydownloader database"
            )
        else:
            log.info(
                "hydownloader-anchor-exporter",
                "Deleting old Hydrus URL data from shared hydownloader database..."
            )
            db.delete_all_hydrus_known_urls()

    sites_to_keywords: dict[str, Tuple[list[str], list[str]]] = {
        'pixiv': (["pixi"], []),
        'gelbooru': (["gelbooru"], []),
        'nijie': (["nijie"], []),
        'lolibooru': (['lolibooru'], []),
        'danbooru': (['danbooru'], []),
        '3dbooru': (['behoimi'], []),
        'sankaku': (['sankaku'], ["idol."]),
        'idolcomplex': (["idol.sankaku"], []),
        'artstation': (["artstation"], []),
        'twitter': (["twitter", "nitter"], []),
        'deviantart': (['deviantart'], []),
        'tumblr': (["tumblr"], []),
        'hentaifoundry': (["hentai-foundry"], []),
        'yandere': (["yande.re"], [])
    }

    siteset = {x.strip() for x in sites.split(',') if x.strip()}
    if sites == "all":
        siteset = set(sites_to_keywords.keys())
    anchors: Counter[str] = collections.Counter()

    for site in siteset:
        if not site in sites_to_keywords:
            log.fatal('hydownloader-anchor-exporter',
                      f'Unsupported site: {site}')

    def process_url(url):
        patterns = urls.anchor_patterns_from_url(url)
        if patterns:
            recognized_urls.add(url)
            anchors[patterns[0]] += 1
        else:
            suspicious_urls.add(url)

    log.info("hydownloader-anchor-exporter", "Processing URLs...")
    for row in rows:
        processed += 1
        if processed % 1000 == 0:
            print(f"Processed {processed}/{all_rows} URLs", file=sys.stderr)
        if fill_known_urls:
            known_url_status = 1
            is_current = row['url_id'] in current_url_ids
            is_deleted = row['url_id'] in deleted_url_ids
            if is_current and is_deleted:
                known_url_status = 4
            elif is_deleted:
                known_url_status = 3
            elif is_current:
                known_url_status = 2
            db.add_hydrus_known_url(row['url'], known_url_status)
        for site in siteset:
            accepts, rejects = sites_to_keywords[site]
            url_ok = False
            for accept in accepts:
                if accept in row['url']:
                    url_ok = True
                    break
            if url_ok:
                for reject in rejects:
                    if reject in row['url']: url_ok = False
            if url_ok:
                process_url(row['url'])
    log.info("hydownloader-anchor-exporter", "Done processing URLs")

    if unrecognized_urls_file:
        log.info("hydownloader-anchor-exporter",
                 "Writing unrecognized URLs...")
        with open(unrecognized_urls_file, 'w', encoding='utf-8') as f:
            for url in sorted(suspicious_urls):
                f.write(url.strip() + '\n')
        log.info("hydownloader-anchor-exporter",
                 "Done writing unrecognized URLs")
    if recognized_urls_file:
        log.info("hydownloader-anchor-exporter", "Writing recognized URLs...")
        with open(recognized_urls_file, 'w', encoding='utf-8') as f:
            for url in sorted(recognized_urls):
                f.write(url.strip() + '\n')
        log.info("hydownloader-anchor-exporter",
                 "Done writing recognized URLs")

    log.info("hydownloader-anchor-exporter", "Inserting new anchors...")
    anchor_count = len(anchors.keys())
    processed = 0
    new_anchor_rows = 0
    for anchor in anchors:
        processed += 1
        if processed % 50 == 0:
            print(f"Inserting new anchors {processed}/{anchor_count}",
                  file=sys.stderr)
        final_anchors = [anchor]
        if anchor.startswith("nijie"):
            for i in range(anchors[anchor]):
                final_anchors.append(anchor + "_" + str(i))
        if anchor.startswith("twitter") or anchor.startswith("tumblr"):
            for i in range(anchors[anchor] + 1):
                final_anchors.append(anchor + "_" + str(i))
        if anchor.startswith("pixiv"):
            for i in range(anchors[anchor]):
                final_anchors.append(anchor + "_p{:02d}".format(i))
        for f_a in final_anchors:
            if f_a in known_anchors:
                continue
            ac.execute('insert into archive(entry) values (?)', (f_a, ))
            new_anchor_rows += 1
    log.info(
        "hydownloader-anchor-exporter",
        f"Done inserting new anchors, added {new_anchor_rows} entries in total"
    )

    anchor_db.commit()
    anchor_db.close()
    hydrus_db.close()
    db.shutdown()
Exemplo n.º 13
0
def run_job(path: str, job: str, config: Optional[str], verbose: bool,
            do_it: bool, no_stop_on_missing_metadata: bool) -> None:
    log.init(path, True)
    db.init(path)

    config_path = db.get_rootpath() + '/hydownloader-import-jobs.json'
    data_path = db.get_datapath()
    if config:
        config_path = config
    if not os.path.isfile(config_path):
        log.fatal("hydownloader-importer",
                  f"Configuration file not found: {config_path}")

    jobs = json.load(open(config_path, 'r', encoding='utf-8-sig'))
    if not job in jobs:
        log.fatal("hydownloader-importer",
                  f"Job not found in configuration file: {job}")
    jd = jobs[job]

    force_add_metadata = jd.get('forceAddMetadata', True)
    force_add_files = jd.get('forceAddFiles', False)

    client = hydrus.Client(jd['apiKey'], jd['apiURL'])

    log.info("hydownloader-importer", f"Starting import job: {job}")

    # iterate over all files in the data directory
    for root, dirs, files in os.walk(data_path):
        for fname in files:
            # json files hold metadata, don't import them to Hydrus
            if fname.endswith('.json'):
                continue

            # set up some variables
            # some will be used later in the code, some are meant to be used in user-defined expressions
            abspath = root + "/" + fname
            path = os.path.relpath(abspath, start=data_path)
            split_path = os.path.split(path)
            fname_noext, fname_ext = os.path.splitext(fname)
            if fname_ext.startswith('.'): fname_ext = fname_ext[1:]

            # find the path of the associated json metadata file, check if it exists
            # for pixiv ugoira, the same metadata file belongs both to the .webm and the .zip,
            # so this needs special handling
            json_path = abspath + '.json'
            if not os.path.isfile(json_path) and abspath.endswith('.webm'):
                json_path = abspath[:-4] + "zip.json"
            json_exists = True
            if not os.path.isfile(json_path):
                json_exists = False
                printerr(f"Warning: no metadata file found for {path}")
                if not no_stop_on_missing_metadata:
                    sys.exit(1)

            generated_urls = set()
            generated_tags: set[tuple[str, str]] = set()
            matched = False  # will be true if at least 1 filter group matched the file
            json_data = None  # this will hold the associated json metadata (if available)

            if verbose: printerr(f"Processing file: {path}...")

            # iterate over all filter groups, do they match this file?
            for group in jd['groups']:
                # evaluate filter, load json metadata if the filter matches and we haven't loaded it yet
                should_process = False
                try:
                    should_process = eval(group['filter'])
                except:
                    printerr(f"Failed to evaluate filter: {group['filter']}")
                    sys.exit(1)
                if not json_data and json_exists:
                    try:
                        json_data = json.load(
                            open(json_path, encoding='utf-8-sig'))
                    except json.decoder.JSONDecodeError:
                        printerr(f"Failed to parse JSON: {json_path}")
                        sys.exit(1)
                if not should_process:
                    continue
                matched = True

                # get the data for this file from the additional_data db table and process it
                # set up some variables that user-defined expressions will be able to use
                additional_data_dicts = db.get_additional_data_for_file(path)
                if not additional_data_dicts and path.endswith('.webm'):
                    additional_data_dicts = db.get_additional_data_for_file(
                        path[:-4] + "zip")
                extra_tags: defaultdict[str, list[str]] = defaultdict(list)
                min_time_added = -1
                max_time_added = -1
                for d in additional_data_dicts:
                    parse_additional_data(extra_tags, d['data'])
                    if min_time_added == -1 or min_time_added > d['time_added']:
                        min_time_added = d['time_added']
                    if max_time_added == -1 or max_time_added < d['time_added']:
                        max_time_added = d['time_added']
                sub_ids = []
                url_ids = []
                for d in additional_data_dicts:
                    if d['subscription_id']:
                        sub_ids.append(str(d['subscription_id']))
                    if d['url_id']:
                        url_ids.append(str(d['url_id']))

                # execute user-defined tag and url generator expressions
                has_error = False
                for dtype, d in [('tag', x) for x in group.get('tags', [])] + [
                    ('url', x) for x in group.get('urls', [])
                ]:
                    skip_on_error = d.get("skipOnError", False)
                    allow_empty = d.get("allowEmpty", False)
                    rule_name = d.get("name")
                    generated_results = []
                    # if the expression is a single string
                    if isinstance(d["values"], str):
                        try:
                            eval_res = eval(d["values"])
                            # check result type: must be string or iterable of strings
                            if isinstance(eval_res, str):
                                generated_results = [eval_res]
                            else:
                                for eval_res_str in eval_res:
                                    if not isinstance(eval_res_str, str):
                                        printerr(
                                            f"Invalid result type ({str(type(eval_res_str))}) while evaluating expression: {d['values']}"
                                        )
                                        sys.exit(1)
                                    else:
                                        generated_results.append(eval_res_str)
                        except Exception as e:
                            if verbose:
                                printerr(
                                    f"Failed to evaluate expression: {d['values']}"
                                )
                                print(e)
                            has_error = True
                    else:  # multiple expressions (array of strings)
                        for eval_expr in d["values"]:
                            try:
                                eval_res = eval(eval_expr)
                                # check result type: must be string or iterable of strings
                                if isinstance(eval_res, str):
                                    generated_results = [eval_res]
                                else:
                                    for eval_res_str in eval_res:
                                        if not isinstance(eval_res_str, str):
                                            printerr(
                                                f"Invalid result type ({str(type(eval_res_str))}) while evaluating expression: {eval_expr}"
                                            )
                                            sys.exit(1)
                                        else:
                                            generated_results.append(
                                                eval_res_str)
                            except Exception as e:
                                if verbose:
                                    printerr(
                                        f"Failed to evaluate expression: {eval_expr}"
                                    )
                                    printerr(e)
                                has_error = True

                    # check for empty results or failed evaluation, as necessary
                    if not generated_results and not allow_empty:
                        printerr(
                            f"Error: the rule named {rule_name} yielded no results but this is not allowed"
                        )
                        sys.exit(1)
                    if has_error:
                        printerr(
                            f"Warning: an expression failed to evaluate in the rule named {rule_name}"
                        )
                        if not skip_on_error:
                            sys.exit(1)

                    # save results of the currently evaluated expressions
                    if dtype == 'url':
                        generated_urls.update(generated_results)
                    else:
                        for repo in d["tagRepos"]:
                            generated_tags.update(
                                (repo, tag) for tag in generated_results)
            if matched:
                printerr(f"File matched: {path}...")

                if not os.path.getsize(abspath):
                    print(f"Found truncated file: {abspath}")
                    sys.exit(1)

                if verbose:
                    printerr("Generated URLs:")
                    for url in generated_urls:
                        printerr(url)
                    printerr("Generated tags:")
                    for repo, tag in sorted(list(generated_tags),
                                            key=lambda x: x[0]):
                        printerr(f"{repo} <- {tag}")
                if verbose: printerr('Hashing...')

                # calculate hash, check if Hydrus already knows the file
                already_added = False
                if do_it:
                    hasher = hashlib.sha256()
                    with open(abspath, 'rb') as hashedfile:
                        buf = hashedfile.read(65536 * 16)
                        while len(buf) > 0:
                            hasher.update(buf)
                            buf = hashedfile.read(65536 * 16)
                    hexdigest = hasher.hexdigest()
                    if client.file_metadata(hashes=[hexdigest],
                                            only_identifiers=True):
                        printerr("File is already in Hydrus")
                        already_added = True

                # send file, tags, metadata to Hydrus as needed
                if not already_added or force_add_files:
                    if verbose: printerr("Sending file to Hydrus...")
                    if do_it: client.add_file(abspath)
                if not already_added or force_add_metadata:
                    if verbose: printerr("Associating URLs...")
                    if do_it:
                        client.associate_url(hashes=[hexdigest],
                                             add=generated_urls)
                    if verbose: printerr("Adding tags...")
                    tag_dict = defaultdict(list)
                    for repo, tag in generated_tags:
                        tag_dict[repo].append(tag)
                    if do_it:
                        client.add_tags(hashes=[hexdigest],
                                        service_to_tags=tag_dict)
            else:
                if verbose:
                    printerr(f"Skipping due to no matching filter: {path}")

    log.info("hydownloader-importer", f"Finished import job: {job}")
    db.shutdown()
Exemplo n.º 14
0
def report(path: str, verbose: bool) -> None:
    log.init(path, True)
    db.init(path)
    db.report(verbose)