Пример #1
0
def encode_media(process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC], **kwargs):
    meta_manager = MetaManagerExtended(**kwargs)  # path_meta=kwargs['path_meta'], path_source=kwargs['path_source']
    meta_manager.load_all()

    encoder = Encoder(meta_manager, **kwargs)

    # In the full system, encode will probably be driven from a rabitmq endpoint.
    # For testing locally we are monitoring the 'pendings_actions' list
    for name in progress_bar(
        process_order_function(
            m.name
            for m in meta_manager.meta.values()
            if PENDING_ACTION["encode"] in m.pending_actions or not m.source_hashs
            # (
            #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu',
            #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de',
            #'Gosick - ED2 - Unity (full length)',
            #'Ikimonogakari - Sakura', # Takes 2 hours to encode
            #'Frozen Japanise (find real name)'  # took too long to process
            # 'Parasite Eve - Somnia Memorias',  # Non unicode characterset
            # 'Akira Yamaoka - Día de los Muertos',  # Non unicode characterset
            # 'Higurashi no Naku koro ni - ED - why or why not (full length)',  # When subs import from SSA they have styling information still attached
            # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi',  # It's buggered. Looks like it's trying to containerize subs in a txt file?
            # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d
            # 'Nana (anime) - OP - Rose',  # SSA's have malformed unicode characters
            # 'Lunar Silver Star Story - OP - Wings (Japanese Version)',
            # 'Evangleion ED - Fly Me To The Moon',  # Odd dimensions and needs to be normalised
            # 'Ranma Half OP1 - Jajauma ni Sasenaide',
            # 'Tamako Market - OP - Dramatic Market Ride',
            # 'Fullmetal Alchemist - OP1 - Melissa',  # Exhibits high bitrate pausing at end
            # 'Samurai Champloo - OP - Battlecry',  # Missing title sub with newline
            # 'KAT-TUN Your side [Instrumental]',
        )
    ):
        encoder.encode(name)
Пример #2
0
def encode_media(process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC] , **kwargs):
    meta = MetaManager(kwargs['path_meta'])
    meta.load_all()

    encoder = Encoder(meta, **kwargs)

    # In the full system, encode will probably be driven from a rabitmq endpoint.
    # For testing locally we are monitoring the 'pendings_actions' list
    for name in progress_bar(process_order_function(
            m.name for m in meta.meta.values()
            if PENDING_ACTION['encode'] in m.pending_actions or not m.source_hash
        #(
            #'Lunar Silver Star Story - OP - Wings (Japanese Version)',
            #'Evangleion ED - Fly Me To The Moon',
        #    'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu',
        #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de',
        #'Gosick - ED2 - Unity (full length)',
        #'Ranma Half OP1 - Jajauma ni Sasenaide',
        #'Tamako Market - OP - Dramatic Market Ride',
        #'Fullmetal Alchemist - OP1 - Melissa',  # Exhibits high bitrate pausing at end
        #'Samurai Champloo - OP - Battlecry',  # Missing title sub with newline
        #'KAT-TUN Your side [Instrumental]',
        #)
    )):
        encoder.encode(name)
Пример #3
0
def import_media(**kwargs):
    """
    """
    stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set())

    def get_db_track_names():
        return set(t.source_filename for t in DBSession.query(Track.source_filename))

    meta_manager = MetaManagerExtended(**kwargs)
    importer = TrackImporter(meta_manager=meta_manager)
    stats['db_start'] = get_db_track_names()

    meta_manager.load_all()  # mtime=epoc(last_update())

    meta_processed_track_ids = set(meta_manager.source_hashs)
    stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash)

    for name in progress_bar(meta_manager.meta.keys()):
        try:
            if importer.import_track(name):
                stats['meta_imported'].add(name)
            else:
                stats['meta_hash_matched_db_hash'].add(name)
        except TrackNotProcesedException:
            log.debug('Unprocessed (no source_hash): %s', name)
            stats['meta_unprocessed'].add(name)
        except TrackMissingProcessedFiles as ex:
            if ex.id:
                log.warning('Missing (processed files) delete existing: %s', name)
                delete_track(ex.id)
                commit()
                stats['missing_processed_deleted'].add(name)
            else:
                log.warning('Missing (processed files) abort import: %s', name)
                stats['missing_processed_aborted'].add(name)

    for unneeded_track_id in importer.exisiting_track_ids - meta_processed_track_ids:
        log.warning('Remove: %s', unneeded_track_id)
        stats['db_removed'].append(DBSession.query(Track).get(unneeded_track_id).source_filename or unneeded_track_id)
        delete_track(unneeded_track_id)
        commit()

    stats['db_end'] = get_db_track_names()

    #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported']  # TODO! Reinstate this

    return stats
Пример #4
0
def encode_media(
        process_order_function=PROCESS_ORDER_FUNCS[DEFAULT_ORDER_FUNC],
        **kwargs):
    meta_manager = MetaManagerExtended(
        **kwargs
    )  #path_meta=kwargs['path_meta'], path_source=kwargs['path_source']
    meta_manager.load_all()

    encoder = Encoder(meta_manager, **kwargs)

    # In the full system, encode will probably be driven from a rabitmq endpoint.
    # For testing locally we are monitoring the 'pendings_actions' list
    for name in progress_bar(
            process_order_function(
                m.name for m in meta_manager.meta.values()
                if PENDING_ACTION['encode'] in m.pending_actions
                or not m.source_hashs
                #(
                #'AKB0048 Next Stage - ED1 - Kono Namida wo Kimi ni Sasagu',
                #'Cuticle Tantei Inaba - OP - Haruka Nichijou no Naka de',
                #'Gosick - ED2 - Unity (full length)',
                #'Ikimonogakari - Sakura', # Takes 2 hours to encode
                #'Frozen Japanise (find real name)'  # took too long to process

                # 'Parasite Eve - Somnia Memorias',  # Non unicode characterset
                # 'Akira Yamaoka - Día de los Muertos',  # Non unicode characterset
                # 'Higurashi no Naku koro ni - ED - why or why not (full length)',  # When subs import from SSA they have styling information still attached
                # 'Gatekeepers - OP - For the Smiles of Tomorrow.avi',  # It's buggered. Looks like it's trying to containerize subs in a txt file?
                # 'Get Backers - ED2 - Namida no Hurricane', # It's just f****d
                # 'Nana (anime) - OP - Rose',  # SSA's have malformed unicode characters
                # 'Lunar Silver Star Story - OP - Wings (Japanese Version)',
                # 'Evangleion ED - Fly Me To The Moon',  # Odd dimensions and needs to be normalised
                # 'Ranma Half OP1 - Jajauma ni Sasenaide',
                # 'Tamako Market - OP - Dramatic Market Ride',
                # 'Fullmetal Alchemist - OP1 - Melissa',  # Exhibits high bitrate pausing at end
                # 'Samurai Champloo - OP - Battlecry',  # Missing title sub with newline
                # 'KAT-TUN Your side [Instrumental]',
            )):
        encoder.encode(name)
Пример #5
0
def import_media(**kwargs):
    """
    """
    stats = dict(meta_set=set(),
                 meta_imported=set(),
                 meta_unprocessed=set(),
                 db_removed=list(),
                 missing_processed_deleted=set(),
                 missing_processed_aborted=set(),
                 db_start=set(),
                 meta_hash_matched_db_hash=set())

    track_api = partial(_track_api, kwargs['api_host'])

    meta_manager = MetaManagerExtended(**kwargs)
    meta_manager.load_all()  # mtime=epoc(last_update())
    processed_track_ids = set(meta_manager.source_hashs)
    processed_files_lookup = set(
        f.relative
        for f in fast_scan(meta_manager.processed_files_manager.path))
    existing_tracks = track_api()['data']['tracks']
    existing_track_ids = existing_tracks.keys()

    generate_track_dict = partial(
        _generate_track_dict,
        meta_manager=meta_manager,
        existing_track_ids=existing_track_ids,
        processed_files_lookup=processed_files_lookup)

    stats['db_start'] = set(existing_tracks.values())
    stats['meta_set'] = set(m.name for m in meta_manager.meta_items
                            if m.source_hash)

    tracks_to_add = []
    track_ids_to_delete = []

    log.info(
        f'Importing tracks - Existing:{len(existing_track_ids)} Processed{len(processed_track_ids)}'
    )
    for name in progress_bar(meta_manager.meta.keys()):
        try:
            track = generate_track_dict(name)
            if track:
                stats['meta_imported'].add(name)
                #tracks_to_add.append(track)
                track_api([track], method='POST')
            else:
                stats['meta_hash_matched_db_hash'].add(name)
        except TrackNotProcesedException:
            log.debug('Unprocessed (no source_hash): %s', name)
            stats['meta_unprocessed'].add(name)
        except TrackMissingProcessedFiles as ex:
            if ex.id:
                log.warning('Missing (processed files) delete existing: %s',
                            name)
                track_ids_to_delete.append(ex.id)
                stats['missing_processed_deleted'].add(name)
            else:
                log.warning('Missing (processed files) abort import: %s', name)
                stats['missing_processed_aborted'].add(name)

    for unneeded_track_id in existing_track_ids - processed_track_ids:
        log.warning('Remove: %s', unneeded_track_id)
        stats['db_removed'].append(existing_tracks[unneeded_track_id])
        track_ids_to_delete.append(unneeded_track_id)

    log.info(
        f"""{kwargs['api_host']} -> Add:{len(tracks_to_add)} Delete:{len(track_ids_to_delete)}"""
    )
    #track_api(tracks_to_add, method='POST')
    track_api(track_ids_to_delete, method='DELETE')

    stats['db_end'] = track_api()['data']['tracks'].values()

    #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported']  # TODO! Reinstate this
    return stats
Пример #6
0
def scan_media(**kwargs):
    """
    """

    meta = MetaManager(kwargs['path_meta'])
    meta.load_all()

    log.info('1.) Read file structure into memory')
    folder_structure = FolderStructure.factory(
        path=kwargs['path_source'],
        search_filter=fast_scan_regex_filter(
            file_regex=file_extension_regex(ALL_EXTS),
            ignore_regex=DEFAULT_IGNORE_FILE_REGEX,
        )
    )

    log.info('2.) Locate primary files')
    # Note: Duplicate media is completely ignored/removed in this list
    primary_files = locate_primary_files(folder_structure, file_regex=file_extension_regex(PRIMARY_FILE_RANKED_EXTS))

    log.info("3.) Find associated files as a 'file collection' (based on the name of the primary file)")
    file_collections = {
        f.file_no_ext: get_file_collection(folder_structure, f)
        for f in primary_files
    }

    log.info('4.) Associate file_collections with existing metadata objects')
    for name, file_collection in progress_bar(file_collections.items()):
        meta.load(name)
        m = meta.get(name)
        for f in file_collection:
            m.associate_file(f)
        meta.save(name)

    log.info('5.) Attempt to find associate unassociated files but finding them on the folder_structure in memory')
    # TODO: The step in 5b (mtime and hash check) is not sufficent
    #   We need to identify all unmatched files and generate a hash lookup and match them that way.
    #   It may take longer but it's the only true way of matching a file.
    #   It might in some cases be quicker because we don't need to craw the entire file structure in memory.

    # These are meta items that have a filecollection matched,
    # but that file collection is incomplete, so we have some child files missing
    has_unassociated_files = operator.attrgetter('unassociated_files')
    for m in filter(has_unassociated_files, meta.meta.values()):
        for filename, scan_data in m.unassociated_files.items():

            # 5a.) The unassociated file may not have been found in the inital collection scan,
            # check it's original location and associate if it exists
            f = folder_structure.get(scan_data.get('relative')) if scan_data.get('relative') else None
            if f:
                m.associate_file(f)
                log.warning('Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name)
                continue

            # 5b.) Search the whole folder_structure in memory for a matching hash
            mtime = scan_data['mtime']
            for f in folder_structure.scan(
                lambda f:
                    not IGNORE_SEARCH_EXTS_REGEX.search(f.file)
                    and
                    (f.file == filename or f.stats.st_mtime == mtime)  # TODO: Depricate this!
                    and
                    str(f.hash) == scan_data['hash']
            ):
                log.warning('Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly', f.relative, m.name)
                m.associate_file(f)
                break

        # 5c.)
        # We have done our best at locating missing files
        # Remove them from the tracked list of files.
        m.unlink_unassociated_files()

    log.info('6.) Remove unmatched meta entrys')
    for m in [m for m in meta.meta.values() if not m.file_collection]:
        log.info('Removing meta %s', m.name)
        meta.delete(m.name)

    # (If processed data already exisits, it will be relinked at the encode level)

    meta.save_all()
Пример #7
0
def scan_media(**kwargs):
    """
    """

    meta = MetaManager(kwargs['path_meta'])
    meta.load_all()

    log.info('1.) Read file structure into memory')
    folder_structure = FolderStructure.factory(
        path=kwargs['path_source'],
        search_filter=fast_scan_regex_filter(
            file_regex=file_extension_regex(ALL_EXTS),
            ignore_regex=DEFAULT_IGNORE_FILE_REGEX,
        ))

    log.info('2.) Locate primary files')
    # Note: Duplicate media is completely ignored/removed in this list
    primary_files = locate_primary_files(
        folder_structure,
        file_regex=file_extension_regex(PRIMARY_FILE_RANKED_EXTS))

    log.info(
        "3.) Find associated files as a 'file collection' (based on the name of the primary file)"
    )
    file_collections = {
        f.file_no_ext: get_file_collection(folder_structure, f)
        for f in primary_files
    }

    log.info('4.) Associate file_collections with existing metadata objects')
    for name, file_collection in progress_bar(file_collections.items()):
        meta.load(name)
        m = meta.get(name)
        for f in file_collection:
            m.associate_file(f)
        meta.save(name)

    log.info(
        '5.) Attempt to find associate unassociated files but finding them on the folder_structure in memory'
    )
    # TODO: The step in 5b (mtime and hash check) is not sufficent
    #   We need to identify all unmatched files and generate a hash lookup and match them that way.
    #   It may take longer but it's the only true way of matching a file.
    #   It might in some cases be quicker because we don't need to craw the entire file structure in memory.

    # These are meta items that have a filecollection matched,
    # but that file collection is incomplete, so we have some child files missing
    has_unassociated_files = operator.attrgetter('unassociated_files')
    for m in filter(has_unassociated_files, meta.meta.values()):
        for filename, scan_data in m.unassociated_files.items():

            # 5a.) The unassociated file may not have been found in the inital collection scan,
            # check it's original location and associate if it exists
            f = folder_structure.get(scan_data.get(
                'relative')) if scan_data.get('relative') else None
            if f:
                m.associate_file(f)
                log.warning(
                    'Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly',
                    f.relative, m.name)
                continue

            # 5b.) Search the whole folder_structure in memory for a matching hash
            mtime = scan_data['mtime']
            for f in folder_structure.scan(
                    lambda f: not IGNORE_SEARCH_EXTS_REGEX.search(f.file) and
                (f.file == filename or f.stats.st_mtime == mtime
                 )  # TODO: Depricate this!
                    and str(f.hash) == scan_data['hash']):
                log.warning(
                    'Associating found missing file %s to %s - this should not be a regular occurance, move/rename this so it is grouped effectivly',
                    f.relative, m.name)
                m.associate_file(f)
                break

        # 5c.)
        # We have done our best at locating missing files
        # Remove them from the tracked list of files.
        m.unlink_unassociated_files()

    log.info('6.) Remove unmatched meta entrys')
    for m in [m for m in meta.meta.values() if not m.file_collection]:
        log.info('Removing meta %s', m.name)
        meta.delete(m.name)

    # (If processed data already exisits, it will be relinked at the encode level)

    meta.save_all()
Пример #8
0
def import_media(**kwargs):
    """
    """
    stats = dict(meta_set=set(), meta_imported=set(), meta_unprocessed=set(), db_removed=list(), missing_processed_deleted=set(), missing_processed_aborted=set(), db_start=set(), meta_hash_matched_db_hash=set())

    track_api = partial(_track_api, kwargs['api_host'])

    meta_manager = MetaManagerExtended(**kwargs)
    meta_manager.load_all()  # mtime=epoc(last_update())
    processed_track_ids = set(meta_manager.source_hashs)
    processed_files_lookup = set(f.relative for f in fast_scan(meta_manager.processed_files_manager.path))
    existing_tracks = track_api()['data']['tracks']
    existing_track_ids = existing_tracks.keys()

    generate_track_dict = partial(_generate_track_dict, meta_manager=meta_manager, existing_track_ids=existing_track_ids, processed_files_lookup=processed_files_lookup)

    stats['db_start'] = set(existing_tracks.values())
    stats['meta_set'] = set(m.name for m in meta_manager.meta_items if m.source_hash)

    tracks_to_add = []
    track_ids_to_delete = []

    log.info('Importing tracks - Existing:{} Processed:{}'.format(len(existing_track_ids), len(processed_track_ids)))  # TODO: replace with formatstring
    for name in progress_bar(meta_manager.meta.keys()):
        try:
            track = generate_track_dict(name)
            if track:
                stats['meta_imported'].add(name)
                #tracks_to_add.append(track)
                track_api([track], method='POST')
            else:
                stats['meta_hash_matched_db_hash'].add(name)
        except TrackNotProcesedException:
            log.debug('Unprocessed (no source_hash): %s', name)
            stats['meta_unprocessed'].add(name)
        except TrackMissingProcessedFiles as ex:
            if ex.id:
                log.warning('Missing (processed files) delete existing: %s', name)
                track_ids_to_delete.append(ex.id)
                stats['missing_processed_deleted'].add(name)
            else:
                log.warning('Missing (processed files) abort import: %s', name)
                stats['missing_processed_aborted'].add(name)

    for unneeded_track_id in existing_track_ids - processed_track_ids:
        log.warning('Remove: %s', unneeded_track_id)
        stats['db_removed'].append(existing_tracks[unneeded_track_id])
        track_ids_to_delete.append(unneeded_track_id)

    log.info("""{api_host} -> Add:{add_count} Delete:{delete_count}""".format(
        api_host=kwargs['api_host'],
        add_count=len(tracks_to_add),
        delete_count=len(track_ids_to_delete),
    ))  # TODO: replace with formatstring
    #track_api(tracks_to_add, method='POST')
    track_api(track_ids_to_delete, method='DELETE')

    stats['db_end'] = track_api()['data']['tracks'].values()

    #assert stats['db_end'] == stats['meta_hash_matched_db_hash'] | stats['meta_imported']  # TODO! Reinstate this
    return stats