def extrafanart_download(data, path, conf: Config, filepath, failed_folder): if not conf.is_extrafanart(): return j = 1 path = path + '/' + conf.get_extrafanart() for url in data: if download_file_with_filename(url, '/extrafanart-' + str(j) + '.jpg', path, conf, filepath, failed_folder) == 'failed': moveFailedFolder(filepath, failed_folder) return switch, _proxy, _timeout, retry, _proxytype = conf.proxy() for i in range(retry): if os.path.getsize(path + '/extrafanart-' + str(j) + '.jpg') == 0: print('[!]Image Download Failed! Trying again. [{}/3]', i + 1) download_file_with_filename(url, '/extrafanart-' + str(j) + '.jpg', path, conf, filepath, failed_folder) continue else: break if os.path.getsize(path + '/extrafanart-' + str(j) + '.jpg') == 0: return logger.debug(f'Image Downloaded! {path}/extrafanart-{j}.jpg') j += 1
def rm_empty_success_failed_folder(conf: config.Config) -> None: """当成功、失败文件夹为空的时候删除它们""" for d in [conf.failed_folder(), conf.success_folder()]: root_path = os.path.join(conf.folder_path, d) for dirpath, _, _ in os.walk(root_path, topdown=False): if not os.listdir(dirpath): try: logger.info(f"试图删除空文件夹{dirpath}。") os.rmdir(dirpath) except OSError as ex: logger.debug("删除失败")
def create_success_failed_folder(conf: config.Config) -> bool: """试图创建失败文件夹,返回创建成功与否。""" for d in [conf.failed_folder(), conf.success_folder()]: path = os.path.join(conf.folder_path, d) if not os.path.isdir(path): try: logger.debug(f"试图创建{path}。") os.makedirs(path) except: logger.debug("创建失败") return False return True
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response: switch, proxy, timeout, retry_count, proxytype = Config.get_instance( ).proxy() proxies = get_proxy(proxy, proxytype) headers_ua = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36" } if headers is None: headers = headers_ua else: headers.update(headers_ua) for i in range(retry_count): try: if switch == 1 or switch == '1': result = requests.post(url, data=query, proxies=proxies, headers=headers, timeout=timeout) else: result = requests.post(url, data=query, headers=headers, timeout=timeout) return result except requests.exceptions.ProxyError: print("[-]Connect retry {}/{}".format(i + 1, retry_count)) print("[-]Connect Failed! Please check your Proxy or Network!")
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None): verify = Config.get_instance().cacert_file() switch, proxy, timeout, retry_count, proxytype = Config.get_instance( ).proxy() proxies = get_proxy(proxy, proxytype) if ua is None: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36" } # noqa else: headers = {"User-Agent": ua} for i in range(retry_count): try: if switch == '1' or switch == 1: result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, verify=verify, cookies=cookies) else: result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) result.encoding = "utf-8" if return_type == "object": return result elif return_type == "content": return result.content else: return result.text except requests.exceptions.ProxyError: print("[-]Proxy error! Please check your Proxy") return except Exception as e: print("[-]Connect retry {}/{}".format(i + 1, retry_count)) print("[-]" + str(e)) print('[-]Connect Failed! Please check your Proxy or Network!')
def download_file_with_filename(url, filename, path, conf: Config, filepath, failed_folder): switch, proxy, timeout, retry_count, proxytype = conf.proxy() for i in range(retry_count): try: if switch == 1 or switch == '1': if not os.path.exists(path): os.makedirs(path) proxies = get_proxy(proxy, proxytype) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies) if r == '': print('[-]Movie Data not found!') return with open(str(path) + "/" + filename, "wb") as code: code.write(r.content) return else: if not os.path.exists(path): os.makedirs(path) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } r = requests.get(url, timeout=timeout, headers=headers) if r == '': print('[-]Movie Data not found!') return with open(str(path) + "/" + filename, "wb") as code: code.write(r.content) return except requests.exceptions.RequestException: i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) except requests.exceptions.ConnectionError: i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) except requests.exceptions.ProxyError: i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) except requests.exceptions.ConnectTimeout: i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) print('[-]Connect Failed! Please check your Proxy or Network!') moveFailedFolder(filepath, failed_folder) return
def create_folder(movie: Movie, conf: config.Config) -> str: """为每部影片建立对应的文件夹。""" target_path = os.path.join(conf.folder_path, conf.success_folder(), movie.storage_dir) logger.debug(f"试图创建{target_path}。") if not os.path.exists(target_path): try: os.makedirs(target_path) except: logger.debug("创建文件夹失败。尝试默认文件夹规则。") location_rule = '-'.join([movie.movie_id, movie.title[:10]]) target_path = os.path.join(conf.folder_path, conf.success_folder(), location_rule) logger.debug(f"试图创建{target_path}。") os.makedirs(target_path) return target_path
def is_uncensored(number): if re.match('^\d{4,}', number) or re.match( 'n\d{4}', number) or 'HEYZO' in number.upper(): return True configs = Config.get_instance().get_uncensored() prefix_list = str(configs).split(',') for pre in prefix_list: if pre.upper() in number.upper(): return True return False
def movie_lists(conf:Config, root, escape_folder, total = None) -> dict[str, list[str]]: if os.path.basename(root) in escape_folder: return {} if not total: total = defaultdict(list) file_type = conf.media_type().upper().split(",") dirs = os.listdir(root) for entry in dirs: f = os.path.join(root, entry) if os.path.isdir(f): movie_lists(conf, f, escape_folder, total) elif os.path.splitext(f)[1].upper() in file_type: fname = os.path.basename(f) movie_id = get_number(conf.debug(), fname) if not movie_id: logger.warning(f'文件ID查找失败: {f}') else: total[movie_id].append(os.path.abspath(f)) return total
def paste_file_to_folder(movie: Movie, filepath, path, conf: Config): # 文件路径,番号,后缀,要移动至的位置 houzhui = os.path.splitext(filepath)[1].replace(",", "") file_parent_origin_path = str(pathlib.Path(filepath).parent) try: targetpath = os.path.join(path, movie.storage_fname + houzhui) # 如果soft_link=1 使用软链接 if conf.soft_link(): os.symlink(filepath, targetpath) # 采用相对路径,以便网络访问时能正确打开视频 filerelpath = os.path.relpath(filepath, path) os.symlink(filerelpath, targetpath) else: os.rename(filepath, targetpath) # 移走文件后,在原来位置增加一个可追溯的软链接,指向文件新位置 # 以便追查文件从原先位置被移动到哪里了,避免因为得到错误番号后改名移动导致的文件失踪 # 便于手工找回文件。并将软连接文件名后缀修改,以避免再次被搜刮。 # windows 会爆炸 # targetabspath = os.path.abspath(targetpath) # if targetabspath != os.path.abspath(filepath): # targetrelpath = os.path.relpath(targetabspath, file_parent_origin_path) # os.symlink(targetrelpath, filepath + '#sym') sub_res = conf.sub_rule() for subname in sub_res: if os.path.exists(movie.storage_fname + subname): # 字幕移动 os.rename(movie.storage_fname + subname, path + '/' + movie.storage_fname + subname) print('[+]Sub moved!') return True except FileExistsError: print('[-]File Exists! Please check your movie!') print('[-]move to the root folder of the program.') return except PermissionError: logger.error('Please run as administrator!', exc_info=True) return
def process_tags(tags: list[str]) -> list[str]: config = Config.get_instance() translate_to_sc = config.translate_to_sc() tags = [i.strip() for i in tags] tags = [i for i in tags if i] if translate_to_sc: tags = [translate_tag_to_sc(t) for t in tags] tags = [t for t in tags if t] output = list(set(tags)) return output
def moveFailedFolder(filepath, failed_folder): config = Config.get_instance() if config.failed_move(): root_path = str(pathlib.Path(filepath).parent) file_name = pathlib.Path(filepath).name destination_path = root_path + '/' + failed_folder + '/' if config.soft_link(): print('[-]Create symlink to Failed output folder') os.symlink(filepath, destination_path + '/' + file_name) else: print('[-]Move to Failed output folder') shutil.move(filepath, destination_path) return
def __init__(self): self._title: str = '' self._actors: list[str] = [] self._release: str = '' self.year: str = '' self._cover_small: str = '' self._tags: list[str] = [] self._studio: str = '' self.director: str = '' self.movie_id: str = '' self.cover: str = '' self.outline: str = '' self.runtime: str = '' self.series: str = '' self.scraper_source: str = '' self.label: str = '' self.trailer: str = '' self.website: str = '' self.imagecut: int = 0 self.extra_fanart: list[str] = [] self.original_path = '' self._ratings: list[Rating] = [] self.fname_postfix = '' self._conf: Config = Config.get_instance()
def create_data_and_move(movies: tuple[str, list[str]], c: Config): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 movie_id, files = movies files.sort() if c.debug() == True: logger.attn(f'[!]Making Data for [{files[0]}], the number is [{movie_id}]') core_main(files, movie_id, c) logger.info('======================================================') else: try: logger.attn(f'[!]Making Data for [{files[0]}], the number is [{movie_id}]') core_main(files, movie_id, c) logger.info('======================================================') except Exception as err: logger.error('[-] [{files[0]}] ERROR:') logger.error(err)
def trailer_download(movie: Movie, path, filepath, conf: Config, failed_folder): fname = movie.storage_fname + '-trailer.mp4' if download_file_with_filename(movie.trailer, fname, path, conf, filepath, failed_folder) == 'failed': return switch, _proxy, _timeout, retry, _proxytype = conf.proxy() for i in range(retry): if os.path.getsize(path + '/' + fname) == 0: print('[!]Video Download Failed! Trying again. [{}/3]', i + 1) download_file_with_filename(movie.trailer, fname, path, conf, filepath, failed_folder) continue else: break if os.path.getsize(path + '/' + fname) == 0: return print('[+]Video Downloaded!', path + '/' + fname)
def image_download(movie: Movie, path, conf: Config, filepath, failed_folder): fanart_name = movie.storage_fname + '-fanart.jpg' thumb_name = movie.storage_fname + '-thumb.jpg' if download_file_with_filename(movie.cover, fanart_name, path, conf, filepath, failed_folder) == 'failed': moveFailedFolder(filepath, failed_folder) return switch, _proxy, _timeout, retry, _proxytype = conf.proxy() for i in range(retry): if os.path.getsize(path + '/' + fanart_name) == 0: print('[!]Image Download Failed! Trying again. [{}/3]', i + 1) download_file_with_filename(movie.cover, fanart_name, path, conf, filepath, failed_folder) continue else: break if os.path.getsize(path + '/' + fanart_name) == 0: return logger.debug(f'Image Downloaded! {path}/{fanart_name}') shutil.copyfile(path + '/' + fanart_name, path + '/' + thumb_name)
def create_data_and_move_with_custom_number(file_path: str, c: Config, custom_number=None): try: print("[!]Making Data for [{}], the number is [{}]".format( file_path, custom_number)) core_main(file_path, custom_number, c) print("[*]======================================================") except Exception as err: print("[-] [{}] ERROR:".format(file_path)) print('[-]', err) if c.soft_link(): print("[-]Link {} to failed folder".format(file_path)) os.symlink(file_path, conf.failed_folder() + "/") else: try: print("[-]Move [{}] to failed folder".format(file_path)) shutil.move(file_path, conf.failed_folder() + "/") except Exception as err: print('[!]', err)
def get_javlib_cookie() -> tuple[dict, str]: import cloudscraper switch, proxy, timeout, retry_count, proxytype = Config.get_instance( ).proxy() proxies = get_proxy(proxy, proxytype) raw_cookie = {} user_agent = "" # Get __cfduid/cf_clearance and user-agent for i in range(retry_count): try: if switch == 1 or switch == '1': raw_cookie, user_agent = cloudscraper.get_cookie_string( "http://www.javlibrary.com/", proxies=proxies) else: raw_cookie, user_agent = cloudscraper.get_cookie_string( "http://www.javlibrary.com/") except requests.exceptions.ProxyError: print("[-] ProxyError, retry {}/{}".format(i + 1, retry_count)) except cloudscraper.exceptions.CloudflareIUAMError: print("[-] IUAMError, retry {}/{}".format(i + 1, retry_count)) return raw_cookie, user_agent
if __name__ == '__main__': version = '4.5.1' config_logging() # Parse command line args single_file_path, folder_path, config_file, custom_number, auto_exit = argparse_function( version) logger.info(' AV Data Capture '.center(54, '=')) logger.info(version.center(54)) logger.info(''.center(54, '=')) # Read config.ini conf = Config.get_instance(path=config_file) if conf.update_check(): check_update(version) if conf.soft_link(): print('[!]Enable soft link') if single_file_path: conf.folder_path = os.path.basename(single_file_path) else: if not folder_path: folder_path = dir_picker() conf.folder_path = folder_path if not conf.folder_path: logger.critical('无法定位根文件夹。', exc_info=True)
def get_data_from_json(file_number: str, filepath: str) -> Movie: # 从JSON返回元数据 """ iterate through all services and fetch the data """ conf = Config.get_instance() func_mapping = { "airav": airav.main, "avsox": avsox.main, "fc2": fc2.main, "fanza": fanza.main, "javdb": javdb.main, "javbus": javbus.main, "mgstage": mgstage.main, "jav321": jav321.main, "xcity": xcity.main, "javlib": javlib.main, "dlsite": dlsite.main, "metajavlib": metajavlib.main, } # default fetch order list, from the beginning to the end sources = conf.sources().split(',') # if the input file name matches certain rules, # move some web service to the beginning of the list if "avsox" in sources and (re.match(r"^\d{5,}", file_number) or "HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number): # if conf.debug() == True: # print('[+]select avsox') sources.insert(0, sources.pop(sources.index("avsox"))) elif "mgstage" in sources and (re.match(r"\d+\D+", file_number) or "SIRO" in file_number.upper()): # if conf.debug() == True: # print('[+]select fanza') sources.insert(0, sources.pop(sources.index("mgstage"))) elif "fc2" in sources and ("FC2" in file_number.upper()): # if conf.debug() == True: # print('[+]select fc2') sources.insert(0, sources.pop(sources.index("fc2"))) elif "dlsite" in sources and ("RJ" in file_number or "rj" in file_number or "VJ" in file_number or "vj" in file_number): # if conf.debug() == True: # print('[+]select dlsite') sources.insert(0, sources.pop(sources.index("dlsite"))) json_data = {} movie = None for source in sources: try: if conf.debug(): logger.attn(f'select {source}') returnval = func_mapping[source](file_number) if (isinstance(returnval, Movie)): if returnval.is_filled(): movie = returnval break else: json_data = json.loads(returnval) # if any service return a valid return, break if get_data_state(json_data): break except: traceback.print_exc() break # Return if data not found in all sources if not json_data and not movie: print('[-]Movie Data not found!') moveFailedFolder(filepath, conf.failed_folder()) return Movie() # ================================================网站规则添加结束================================================ if not movie: movie = Movie() movie.title = json_data.get('title') movie.actors = json_data.get('actor') movie.release = json_data.get('release') movie.cover_small = json_data.get('cover_small') movie.cover = json_data.get('cover') movie.tags = json_data.get('tag') movie.year = json_data.get('year') movie.series = json_data.get('series') movie.runtime = json_data.get('runtime') movie.outline = json_data.get('outline') movie.scraper_source = json_data.get('source') movie.label = json_data.get('label') movie.studio = json_data.get('studio') movie.director = json_data.get('director') movie.movie_id = json_data.get('number') movie.trailer = json_data.get('trailer') movie.website = json_data.get('website') movie.imagecut = json_data.get('imagecut') movie.extra_fanart = json_data.get('extrafanart') movie.original_path = filepath if not movie.is_filled(): print('[-]Movie Data not found!') moveFailedFolder(filepath, conf.failed_folder()) return Movie() """ TODO: 翻译以后再说 if conf.is_transalte(): translate_values = conf.transalte_values().split(",") for translate_value in translate_values: if json_data[translate_value] == "": continue # if conf.get_transalte_engine() == "baidu": # json_data[translate_value] = translate( # json_data[translate_value], # target_language="zh", # engine=conf.get_transalte_engine(), # app_id=conf.get_transalte_appId(), # key=conf.get_transalte_key(), # delay=conf.get_transalte_delay(), # ) if conf.get_transalte_engine() == "azure": json_data[translate_value] = translate( json_data[translate_value], target_language="zh-Hans", engine=conf.get_transalte_engine(), key=conf.get_transalte_key(), ) else: json_data[translate_value] = translate(json_data[translate_value]) """ logger.debug(movie) return movie
def core_main(files: list[str], movie_id: str, conf: Config): # =======================================================================初始化所需变量 part = '' leak_word = '' c_word = '' cn_sub = '' filepath = files[0] # 影片的路径 绝对路径 movie = get_data_from_json(movie_id, files[0]) # 定义番号 # Return if blank dict returned (data not found) if not movie.is_filled(): return imagecut = movie.imagecut # =======================================================================判断-C,-CD后缀 if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 movie.add_tag('中文字幕') if '-CD' in filepath or '-cd' in filepath: part = get_part(filepath, conf.failed_folder()) # 判断是否无码 if '无码' in filepath or is_uncensored(movie.movie_id): uncensored = 1 movie.add_tag("无码") else: uncensored = 0 if '流出' in filepath or 'uncensored' in filepath: movie.add_tag("流出") leak = 1 leak_word = '-流出' # 流出影片后缀 else: leak = 0 movie.fname_postfix = leak_word + c_word + part # main_mode # 1: 刮削模式 / Scraping mode # 2: 整理模式 / Organizing mode # 3:不改变路径刮削 if conf.main_mode() == 1: # 创建文件夹 path = create_folder(movie, conf) # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: small_cover_check(movie, path, movie.cover_small, conf, filepath, conf.failed_folder()) # creatFolder会返回番号路径 image_download(movie, path, conf, filepath, conf.failed_folder()) try: # 下载预告片 if movie.trailer and conf.is_trailer(): trailer_download(movie, path, filepath, conf, conf.failed_folder()) except: pass try: # 下载剧照 data, path, conf: Config, filepath, failed_folder if movie.extra_fanart: extrafanart_download(movie.extra_fanart, path, conf, filepath, conf.failed_folder()) except: pass # 裁剪图 cutImage(movie, path) # 打印NFO文件 write_movie_nfo(movie, path) # 移动文件 paste_file_to_folder(movie, filepath, path, conf) poster_path = path + '/' + movie.storage_fname + '-poster.jpg' thumb_path = path + '/' + movie.storage_fname + '-thumb.jpg' if conf.is_watermark(): add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf) for f in files[1:]: #暴力。。先解决了再说 logger.debug(f'Moving extra part {f}') new_part = get_part(f, conf.failed_folder()) movie.fname_postfix = leak_word + c_word + new_part paste_file_to_folder(movie, f, path, conf) else: logger.critical('Unimplemented mode')