def movie_lists(source_folder, regexstr: str) -> typing.List[str]: conf = config.getInstance() main_mode = conf.main_mode() debug = conf.debug() nfo_skip_days = conf.nfo_skip_days() link_mode = conf.link_mode() file_type = conf.media_type().lower().split(",") trailerRE = re.compile(r'-trailer\.', re.IGNORECASE) cliRE = None if isinstance(regexstr, str) and len(regexstr): try: cliRE = re.compile(regexstr, re.IGNORECASE) except: pass failed_list_txt_path = Path( conf.failed_folder()).resolve() / 'failed_list.txt' failed_set = set() if (main_mode == 3 or link_mode) and not conf.ignore_failed_list(): try: flist = failed_list_txt_path.read_text( encoding='utf-8').splitlines() failed_set = set(flist) if len(flist) != len( failed_set ): # 检查去重并写回,但是不改变failed_list.txt内条目的先后次序,重复的只保留最后的 fset = failed_set.copy() for i in range(len(flist) - 1, -1, -1): fset.remove(flist[i]) if flist[i] in fset else flist.pop(i) failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8') assert len(fset) == 0 and len(flist) == len(failed_set) except: pass if not Path(source_folder).is_dir(): print('[-]Source folder not found!') return [] total = [] source = Path(source_folder).resolve() skip_failed_cnt, skip_nfo_days_cnt = 0, 0 escape_folder_set = set(re.split("[,,]", conf.escape_folder())) for full_name in source.glob(r'**/*'): if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set: continue if not full_name.suffix.lower() in file_type: continue absf = str(full_name) if absf in failed_set: skip_failed_cnt += 1 if debug: print('[!]Skip failed movie:', absf) continue is_sym = full_name.is_symlink() if main_mode != 3 and (is_sym or (full_name.stat().st_nlink > 1 and not conf.scan_hardlink()) ): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标 continue # 模式不等于3下跳过软连接和未配置硬链接刮削 # 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB) movie_size = 0 if is_sym else full_name.stat( ).st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测 if 0 < movie_size < 125829120: # 1024*1024*120=125829120 continue if cliRE and not cliRE.search(absf) or trailerRE.search( full_name.name): continue if main_mode == 3: nfo = full_name.with_suffix('.nfo') if not nfo.is_file(): if debug: print(f"[!]Metadata {nfo.name} not found for '{absf}'") elif nfo_skip_days > 0 and file_modification_days( nfo) <= nfo_skip_days: skip_nfo_days_cnt += 1 if debug: print( f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'" ) continue total.append(absf) if skip_failed_cnt: print( f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'." ) if skip_nfo_days_cnt: print( f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days." ) if nfo_skip_days <= 0 or not link_mode or main_mode == 3: return total # 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的 skip_numbers = set() success_folder = Path(conf.success_folder()).resolve() for f in success_folder.glob(r'**/*'): if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE): continue if file_modification_days(f) > nfo_skip_days: continue number = get_number(False, f.stem) if not number: continue skip_numbers.add(number.lower()) rm_list = [] for f in total: n_number = get_number(False, os.path.basename(f)) if n_number and n_number.lower() in skip_numbers: rm_list.append(f) for f in rm_list: total.remove(f) if debug: print( f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'" ) if len(rm_list): print( f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days." ) return total
def main(args: tuple) -> Path: (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" if main_mode not in (1, 2, 3): print( f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help." ) os._exit(4) signal.signal(signal.SIGINT, signal_handler) if sys.platform == 'win32': signal.signal(signal.SIGBREAK, sigdebug_handler) else: signal.signal(signal.SIGWINCH, sigdebug_handler) dupe_stdout_to_logfile(logdir) platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version()) print('[*]================= Movie Data Capture =================') print('[*]' + version.center(54)) print('[*]======================================================') print('[*]' + platform_total) print('[*]======================================================') print('[*] - 严禁在墙内宣传本项目 - ') print('[*]======================================================') start_time = time.time() print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S")) print(f"[+]Load Config file '{conf.ini_path}'.") if conf.debug(): print('[+]Enable debug') if conf.link_mode() in (1, 2): print('[!]Enable {} link'.format( ('soft', 'hard')[conf.link_mode() - 1])) if len(sys.argv) > 1: print('[!]CmdLine:', " ".join(sys.argv[1:])) print('[+]Main Working mode ## {}: {} ## {}{}{}'.format( *(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1], "" if not conf.multi_threading() else ", multi_threading on", "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}" ) if not single_file_path else ('-', 'Single File', '', '', ''))) if conf.update_check(): try: check_update(version) except Exception as e: print('[-]Update check failed!', e) create_failed_folder(conf.failed_folder()) # Download Mapping Table, parallel version def fmd(f) -> typing.Tuple[str, Path]: """ """ return ( 'https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, Path.home() / '.local' / 'share' / 'mdc' / f) map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) for k, v in map_tab: if v.exists(): if file_modification_days(str(v)) >= conf.mapping_table_validity(): print("[+]Mapping Table Out of date! Remove", str(v)) os.remove(str(v)) try: res = parallel_download_files( ((k, v) for k, v in map_tab if not v.exists())) for i, fp in enumerate(res, start=1): if fp and len(fp): print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}") else: print(f"[-] [{i}/{len(res)}] Mapping Table Download failed") except Exception as e: print("[!] ==================== ERROR ====================") print("[!] " + "Mapping Table Download FAILED".center(47)) print("[!] " + "无法连接github".center(47)) print("[!] " + "请过几小时再试试".center(47)) print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47)) time.sleep(30) os._exit(-1) # create OpenCC converter ccm = conf.cc_convert_mode() try: oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json') except: # some OS no OpenCC cpython, try opencc-python-reimplemented. # pip uninstall opencc && pip install opencc-python-reimplemented oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t') if not single_file_path == '': # Single File print('[+]==================== Single File =====================') if custom_number == '': create_data_and_move_with_custom_number( single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC) else: create_data_and_move_with_custom_number(single_file_path, custom_number, oCC) else: folder_path = conf.source_folder() if not isinstance(folder_path, str) or folder_path == '': folder_path = os.path.abspath(".") movie_list = movie_lists(folder_path, regexstr) count = 0 count_all = str(len(movie_list)) print('[+]Find', count_all, 'movies.') print('[*]======================================================') stop_count = conf.stop_counter() if stop_count < 1: stop_count = 999999 else: count_all = str(min(len(movie_list), stop_count)) for movie_path in movie_list: # 遍历电影列表 交给core处理 count = count + 1 percentage = str(count / int(count_all) * 100)[:4] + '%' print('[!] {:>30}{:>21}'.format( '- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) create_data_and_move(movie_path, zero_op, no_net_op, oCC) if count >= stop_count: print("[!]Stop counter triggered!") break if conf.del_empty_folder() and not zero_op: rm_empty_folder(conf.success_folder()) rm_empty_folder(conf.failed_folder()) if len(folder_path): rm_empty_folder(folder_path) end_time = time.time() total_time = str(timedelta(seconds=end_time - start_time)) print("[+]Running time", total_time[:len(total_time) if total_time.rfind('.') < 0 else -3], " End at", time.strftime("%Y-%m-%d %H:%M:%S")) print("[+]All finished!!!") return close_logfile(logdir)
def main(args: tuple) -> Path: (logdir, regexstr, zero_op, no_net_op, verrel, cpuinfo) = args conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" if main_mode not in (1, 2, 3): print( f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help." ) os._exit(4) signal.signal(signal.SIGINT, signal_handler) if sys.platform == 'win32': signal.signal(signal.SIGBREAK, sigdebug_handler) else: signal.signal(signal.SIGWINCH, sigdebug_handler) dupe_stdout_to_logfile(logdir) x86_64_cpu = cpuinfo['arch'] == 'X86_64' avx_cpu = x86_64_cpu and 'avx' in cpuinfo['flags'] if x86_64_cpu and not avx_cpu: conf.set_override('face:locations_model=') running_env_info = f""" [*] OS: {platform.platform()} [*] CPU: {cpuinfo['brand_raw']}{' (avx)' if avx_cpu else ''} [*] Python: {cpuinfo['python_version']}""".lstrip() print('[*]================== AV Data Capture ===================') print('[*]' + verrel.center(54)) print('[*]======================================================') print(running_env_info) print('[*]======================================================') print('[*] - 严禁在墙内宣传本项目 - ') print('[*]======================================================') start_time = time.time() print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S")) print(f"[+]Load Config file '{conf.ini_path}'.") if conf.debug(): print('[+]Enable debug') if conf.link_mode() in (1, 2): print('[!]Enable {} link'.format( ('soft', 'hard')[conf.link_mode() - 1])) if len(sys.argv) > 1: print('[!]CmdLine:', " ".join(sys.argv[1:])) print('[+]Main Working mode ## {}: {} ## {}{}{}'.format( *(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1], "" if not conf.multi_threading() else ", multi_threading on", "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"))) if conf.update_check(): try: check_update(version) except Exception as e: print('[-]Update check failed!', e) create_failed_folder(conf.failed_folder()) # Download Mapping Table, parallel version def fmd(f) -> typing.Tuple[str, Path]: """ """ return ( 'https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, Path.home() / '.local' / 'share' / 'avdc' / f) map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) for k, v in map_tab: if v.exists(): if file_modification_days(str(v)) >= conf.mapping_table_validity(): print("[+]Mapping Table Out of date! Remove", str(v)) os.remove(str(v)) try: res = parallel_download_files( ((k, v) for k, v in map_tab if not v.exists())) for i, fp in enumerate(res, start=1): if fp and len(fp): print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}") else: print(f"[-] [{i}/{len(res)}] Mapping Table Download failed") except Exception as e: print("[!] ==================== ERROR ====================") print("[!] " + "Mapping Table Download FAILED".center(47)) print("[!] " + "无法连接github".center(47)) print("[!] " + "请过几小时再试试".center(47)) print("[!]", e) print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47)) time.sleep(30) os._exit(-1) # create OpenCC converter ccm = conf.cc_convert_mode() try: oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json') except: # some OS no OpenCC cpython, try opencc-python-reimplemented. # pip uninstall opencc && pip install opencc-python-reimplemented oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t') folder_path = conf.source_folder() if not isinstance(folder_path, str) or folder_path == '': folder_path = os.path.abspath(".") movie_list = movie_lists(folder_path, regexstr) count = 0 count_all = str(len(movie_list)) print('[+]Find', count_all, 'movies.') print('[*]======================================================') stop_count = conf.stop_counter() if stop_count < 1: stop_count = 999999 else: count_all = str(min(len(movie_list), stop_count)) for movie_path in movie_list: # 遍历电影列表 交给core处理 count = count + 1 percentage = str(count / int(count_all) * 100)[:4] + '%' print('[!] {:>30}{:>21}'.format( '- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) create_data_and_move(movie_path, zero_op, no_net_op, oCC) if count >= stop_count: print("[!]Stop counter triggered!") break if interval_delay := conf.interval_delay(): time.sleep(interval_delay + secrets.randbelow(30) / 10)