def encode_media(process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC], **kwargs): meta_manager = MetaManagerExtended(**kwargs) # path_meta=kwargs['path_meta'], path_source=kwargs['path_source'] meta_manager.load_all() encoder = Encoder(meta_manager, **kwargs) # In the full system, encode will probably be driven from a rabitmq endpoint. # For testing locally we are monitoring the 'pendings_actions' list for name in progress_bar( process_order_function( m.name for m in meta_manager.meta.values() if PENDING_ACTION["encode"] in m.pending_actions or not m.source_hashs # ( #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu', #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de', #'Gosick - ED2 - Unity (full length)', #'Ikimonogakari - Sakura', # Takes 2 hours to encode #'Frozen Japanise (find real name)' # took too long to process # 'Parasite Eve - Somnia Memorias', # Non unicode characterset # 'Akira Yamaoka - Día de los Muertos', # Non unicode characterset # 'Higurashi no Naku koro ni - ED - why or why not (full length)', # When subs import from SSA they have styling information still attached # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi', # It's buggered. Looks like it's trying to containerize subs in a txt file? # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d # 'Nana (anime) - OP - Rose', # SSA's have malformed unicode characters # 'Lunar Silver Star Story - OP - Wings (Japanese Version)', # 'Evangleion ED - Fly Me To The Moon', # Odd dimensions and needs to be normalised # 'Ranma Half OP1 - Jajauma ni Sasenaide', # 'Tamako Market - OP - Dramatic Market Ride', # 'Fullmetal Alchemist - OP1 - Melissa', # Exhibits high bitrate pausing at end # 'Samurai Champloo - OP - Battlecry', # Missing title sub with newline # 'KAT-TUN Your side [Instrumental]', ) ): encoder.encode(name)
class MetaViewer(object): def __init__(self, path_meta=None, path_processed=None, path_source=None, **kwargs): self.meta_manager = MetaManagerExtended(path_meta=path_meta, path_source=path_source, path_processed=path_processed) def get_meta_details(self, name_regex): if not name_regex: self.meta_manager.load_all() meta_items = self.meta_manager.meta_items else: meta_items = ( self.meta_manager.load(f.file_no_ext) or self.meta_manager.get(f.file_no_ext) for f in self.meta_manager.files if re.search(name_regex, f.file_no_ext, flags=re.IGNORECASE) ) def lazy_exists(path): return lambda: os.path.exists(path) file_details = defaultdict(list) for m in meta_items: for f in filter(None, m.source_files.values()): file_details[m.name].append(FileItem('source', f['relative'], f['absolute'], lazy_exists(f['absolute']))) for f in m.processed_files.values(): file_details[m.name].append(FileItem('processed', f.relative, f.absolute, lazy_exists(f.absolute))) return file_details
def __init__(self, path_meta=None, path_processed=None, path_source=None, **kwargs): self.meta_manager = MetaManagerExtended(path_meta=path_meta, path_source=path_source, path_processed=path_processed)
def cleanup_media(**kwargs): meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() all_known_file_hashs = { processed_file.hash for m in meta_manager.meta_items for processed_file in m.processed_files.values() } unlinked_files = (f for f in meta_manager.processed_files_manager.scan if f.file_no_ext and f.file_no_ext not in all_known_file_hashs) # Todo .. have dryrun and say how much this is cleaning up for unlinked_file in unlinked_files: os.remove(unlinked_file.absolute)
def cleanup_media(**kwargs): meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() all_known_file_hashs = { processed_file.hash for m in meta_manager.meta_items for processed_file in m.processed_files.values() } unlinked_files = ( f for f in meta_manager.processed_files_manager.scan if f.file_no_ext and f.file_no_ext not in all_known_file_hashs) # Todo .. have dryrun and say how much this is cleaning up for unlinked_file in unlinked_files: os.remove(unlinked_file.absolute)
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) def get_db_track_names(): return set(t.source_filename for t in DBSession.query(Track.source_filename)) meta_manager = MetaManagerExtended(**kwargs) importer = TrackImporter(meta_manager=meta_manager) stats['db_start'] = get_db_track_names() meta_manager.load_all() # mtime=epoc(last_update()) meta_processed_track_ids = set(meta_manager.source_hashs) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) for name in progress_bar(meta_manager.meta.keys()): try: if importer.import_track(name): stats['meta_imported'].add(name) else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) delete_track(ex.id) commit() stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in importer.exisiting_track_ids - meta_processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(DBSession.query(Track).get(unneeded_track_id).source_filename or unneeded_track_id) delete_track(unneeded_track_id) commit() stats['db_end'] = get_db_track_names() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats
def cleanup_media(**kwargs): meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() all_known_file_hashs = { processed_file.hash for m in meta_manager.meta_items for processed_file in m.processed_files.values() } unlinked_files = (f for f in meta_manager.processed_files_manager.scan if f.file_no_ext and f.file_no_ext not in all_known_file_hashs) count = 0 for unlinked_file in unlinked_files: if kwargs.get('dryrun'): print(unlinked_file.relative) else: os.remove(unlinked_file.absolute) count += 1 log.info('Cleaned up - {} files'.format(count))
def __enter__(self): self._temp_scan = tempfile.TemporaryDirectory() self._temp_meta = tempfile.TemporaryDirectory() self._temp_processed = tempfile.TemporaryDirectory() self._link_source_files() self.meta_manager = MetaManagerExtended(path_meta=self.path_meta, path_source=self.path_source, path_processed=self.path_processed) self.processed_files_manager = self.meta_manager.processed_files_manager self.source_files_manager = self.meta_manager.source_files_manager return self
def encode_media( process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC], **kwargs): meta_manager = MetaManagerExtended( **kwargs ) #path_meta=kwargs['path_meta'], path_source=kwargs['path_source'] meta_manager.load_all() encoder = Encoder(meta_manager, **kwargs) # In the full system, encode will probably be driven from a rabitmq endpoint. # For testing locally we are monitoring the 'pendings_actions' list for name in progress_bar( process_order_function( m.name for m in meta_manager.meta.values() if PENDING_ACTION['encode'] in m.pending_actions or not m.source_hashs #( #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu', #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de', #'Gosick - ED2 - Unity (full length)', #'Ikimonogakari - Sakura', # Takes 2 hours to encode #'Frozen Japanise (find real name)' # took too long to process # 'Parasite Eve - Somnia Memorias', # Non unicode characterset # 'Akira Yamaoka - Día de los Muertos', # Non unicode characterset # 'Higurashi no Naku koro ni - ED - why or why not (full length)', # When subs import from SSA they have styling information still attached # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi', # It's buggered. Looks like it's trying to containerize subs in a txt file? # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d # 'Nana (anime) - OP - Rose', # SSA's have malformed unicode characters # 'Lunar Silver Star Story - OP - Wings (Japanese Version)', # 'Evangleion ED - Fly Me To The Moon', # Odd dimensions and needs to be normalised # 'Ranma Half OP1 - Jajauma ni Sasenaide', # 'Tamako Market - OP - Dramatic Market Ride', # 'Fullmetal Alchemist - OP1 - Melissa', # Exhibits high bitrate pausing at end # 'Samurai Champloo - OP - Battlecry', # Missing title sub with newline # 'KAT-TUN Your side [Instrumental]', )): encoder.encode(name)
class MetaViewer(object): def __init__(self, path_meta=None, path_processed=None, path_source=None, **kwargs): self.meta_manager = MetaManagerExtended(path_meta=path_meta, path_source=path_source, path_processed=path_processed) def get_meta_details(self, name_regex): if (not name_regex): self.meta_manager.load_all() meta_items = self.meta_manager.meta_items else: meta_items = ( self.meta_manager.load(f.file_no_ext) or self.meta_manager.get(f.file_no_ext) for f in self.meta_manager.files if re.search(name_regex, f.file_no_ext, flags=re.IGNORECASE)) def lazy_exists(path): return lambda: os.path.exists(path) file_details = defaultdict(list) for m in meta_items: for f in filter(None, m.source_files.values()): file_details[m.name].append( FileItem('source', f['relative'], f['absolute'], lazy_exists(f['absolute']))) for f in m.processed_files.values(): file_details[m.name].append( FileItem('processed', f.relative, f.absolute, lazy_exists(f.absolute))) return file_details
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) track_api = partial(_track_api, kwargs['api_host']) meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() # mtime=epoc(last_update()) processed_track_ids = set(meta_manager.source_hashs) processed_files_lookup = set( f.relative for f in fast_scan(meta_manager.processed_files_manager.path)) existing_tracks = track_api()['data']['tracks'] existing_track_ids = existing_tracks.keys() generate_track_dict = partial( _generate_track_dict, meta_manager=meta_manager, existing_track_ids=existing_track_ids, processed_files_lookup=processed_files_lookup) stats['db_start'] = set(existing_tracks.values()) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) tracks_to_add = [] track_ids_to_delete = [] log.info( f'Importing tracks - Existing:{len(existing_track_ids)} Processed{len(processed_track_ids)}' ) for name in progress_bar(meta_manager.meta.keys()): try: track = generate_track_dict(name) if track: stats['meta_imported'].add(name) #tracks_to_add.append(track) track_api([track], method='POST') else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) track_ids_to_delete.append(ex.id) stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in existing_track_ids - processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(existing_tracks[unneeded_track_id]) track_ids_to_delete.append(unneeded_track_id) log.info( f"""{kwargs['api_host']} -> Add:{len(tracks_to_add)} Delete:{len(track_ids_to_delete)}""" ) #track_api(tracks_to_add, method='POST') track_api(track_ids_to_delete, method='DELETE') stats['db_end'] = track_api()['data']['tracks'].values() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats
class ProcessMediaTestManager(object): def __init__(self, source_files=set()): self.source_files = source_files def _link_source_files(self): for f in self.source_files: os.link( os.path.join(SOURCE_PATH, f), os.path.join(self.path_source, f) ) def __enter__(self): self._temp_scan = tempfile.TemporaryDirectory() self._temp_meta = tempfile.TemporaryDirectory() self._temp_processed = tempfile.TemporaryDirectory() self._link_source_files() self.meta_manager = MetaManagerExtended(path_meta=self.path_meta, path_source=self.path_source, path_processed=self.path_processed) self.processed_files_manager = self.meta_manager.processed_files_manager self.source_files_manager = self.meta_manager.source_files_manager return self def __exit__(self, exc_type, exc_value, traceback): self._temp_scan.cleanup() self._temp_meta.cleanup() self._temp_processed.cleanup() self.meta_manager = None self.processed_manager = None self.source_files_manager = None @property def path_source(self): return self._temp_scan.name @property def path_meta(self): return self._temp_meta.name @property def path_processed(self): return self._temp_processed.name @property def commandline_kwargs(self): return dict(path_meta=self.path_meta, path_source=self.path_source, path_processed=self.path_processed, force=True) def scan_media(self): self.meta_manager._release_cache() scan_media(**self.commandline_kwargs) def encode_media(self, mock=None): self.meta_manager._release_cache() if mock: with MockEncodeExternalCalls(): encode_media(**self.commandline_kwargs) else: encode_media(**self.commandline_kwargs) def cleanup_media(self): self.meta_manager._release_cache() cleanup_media(**self.commandline_kwargs) @property def meta(self): """ Dump of all the generated raw meta json files into python data structure """ meta = {} for filename in os.listdir(self.path_meta): with open(os.path.join(self.path_meta, filename), 'r') as meta_filehandle: meta[filename] = json.load(meta_filehandle) return meta @meta.setter def meta(self, data): self.meta_manager._release_cache() for f in os.listdir(self.path_meta): os.remove(os.path.join(self.path_meta, f)) for filename, meta_data in data.items(): with open(os.path.join(self.path_meta, filename), 'w') as meta_filehandle: json.dump(meta_data, meta_filehandle) self.meta_manager.load_all() def update_source_hashs(self, name): m = self.get(name) m.update_source_hashs() self.meta_manager.save(name) def get(self, name): self.meta_manager._release_cache() self.meta_manager.load(name) return self.meta_manager.get(name) def mock_processed_files(self, filenames): for f in filenames: file_path, file_name = os.path.split(f) os.makedirs(os.path.join(self.path_processed, file_path), exist_ok=True) Path(os.path.join(self.path_processed, f)).touch()
def import_media(**kwargs): """ """ stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set()) track_api = partial(_track_api, kwargs['api_host']) meta_manager = MetaManagerExtended(**kwargs) meta_manager.load_all() # mtime=epoc(last_update()) processed_track_ids = set(meta_manager.source_hashs) processed_files_lookup = set(f.relative for f in fast_scan(meta_manager.processed_files_manager.path)) existing_tracks = track_api()['data']['tracks'] existing_track_ids = existing_tracks.keys() generate_track_dict = partial(_generate_track_dict, meta_manager=meta_manager, existing_track_ids=existing_track_ids, processed_files_lookup=processed_files_lookup) stats['db_start'] = set(existing_tracks.values()) stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash) tracks_to_add = [] track_ids_to_delete = [] log.info('Importing tracks - Existing:{} Processed:{}'.format(len(existing_track_ids), len(processed_track_ids))) # TODO: replace with formatstring for name in progress_bar(meta_manager.meta.keys()): try: track = generate_track_dict(name) if track: stats['meta_imported'].add(name) #tracks_to_add.append(track) track_api([track], method='POST') else: stats['meta_hash_matched_db_hash'].add(name) except TrackNotProcesedException: log.debug('Unprocessed (no source_hash): %s', name) stats['meta_unprocessed'].add(name) except TrackMissingProcessedFiles as ex: if ex.id: log.warning('Missing (processed files) delete existing: %s', name) track_ids_to_delete.append(ex.id) stats['missing_processed_deleted'].add(name) else: log.warning('Missing (processed files) abort import: %s', name) stats['missing_processed_aborted'].add(name) for unneeded_track_id in existing_track_ids - processed_track_ids: log.warning('Remove: %s', unneeded_track_id) stats['db_removed'].append(existing_tracks[unneeded_track_id]) track_ids_to_delete.append(unneeded_track_id) log.info("""{api_host} -> Add:{add_count} Delete:{delete_count}""".format( api_host=kwargs['api_host'], add_count=len(tracks_to_add), delete_count=len(track_ids_to_delete), )) # TODO: replace with formatstring #track_api(tracks_to_add, method='POST') track_api(track_ids_to_delete, method='DELETE') stats['db_end'] = track_api()['data']['tracks'].values() #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported'] # TODO! Reinstate this return stats