def Convert(config): # collect_files(get_ray_nodes()) reps = ReprStorage(os.path.join(config.repr.directory)) # print('Extracting Signatures from Video representations') sm = SimilarityModel() vid_level_iterator = bulk_read(reps.video_level) print("Prepare to update database! vid_num :" + str(len(vid_level_iterator))) if len(vid_level_iterator) > 0: signatures = sm.predict( vid_level_iterator) # Get {ReprKey => signature} dict if config.database.use: # Convert dict to list of (path, sha256, url, signature) tuples entries = [(key.path, key.hash, key.url, sig) for key, sig in signatures.items()] # Connect to database database = Database(uri=config.database.uri) database.create_tables() try: # Save signatures result_storage = DBResultStorage(database) result_storage.add_signatures(entries) # after writen to db, remove. for key, sig in signatures.items(): remove_file("/project/data/representations/video_level/" + key.path + ".npy") except Exception as e: print("save db ERROR!") print(e)
def main(config, list_of_files, frame_sampling, save_frames): config = resolve_config(config_path=config, frame_sampling=frame_sampling, save_frames=save_frames) reps = ReprStorage(os.path.join(config.repr.directory)) reprkey = reprkey_resolver(config) print('Searching for Dataset Video Files') if len(list_of_files) == 0: videos = scan_videos(config.sources.root, '**', extensions=config.sources.extensions) else: videos = scan_videos_from_txt(list_of_files, extensions=config.sources.extensions) print('Number of files found: {}'.format(len(videos))) remaining_videos_path = [ path for path in videos if not reps.frame_level.exists(reprkey(path)) ] print('There are {} videos left'.format(len(remaining_videos_path))) VIDEOS_LIST = create_video_list(remaining_videos_path, config.proc.video_list_filename) print('Processed video List saved on :{}'.format(VIDEOS_LIST)) if len(remaining_videos_path) > 0: # Instantiates the extractor model_path = default_model_path( config.proc.pretrained_model_local_path) extractor = IntermediateCnnExtractor( video_src=VIDEOS_LIST, reprs=reps, reprkey=reprkey, frame_sampling=config.proc.frame_sampling, save_frames=config.proc.save_frames, model=(load_featurizer(model_path))) # Starts Extracting Frame Level Features extractor.start(batch_size=16, cores=4) print('Converting Frame by Frame representations to Video Representations') converter = FrameToVideoRepresentation(reps) converter.start() print('Extracting Signatures from Video representations') sm = SimilarityModel() vid_level_iterator = bulk_read(reps.video_level) assert len(vid_level_iterator) > 0, 'No Signatures left to be processed' signatures = sm.predict( vid_level_iterator) # Get {ReprKey => signature} dict print('Saving Video Signatures on :{}'.format(reps.signature.directory)) if config.database.use: # Convert dict to list of (path, sha256, signature) tuples entries = [(key.path, key.hash, key.url, sig) for key, sig in signatures.items()] # Connect to database database = Database(uri=config.database.uri) database.create_tables() # Save signatures result_storage = DBResultStorage(database) result_storage.add_signatures(entries) if config.save_files: bulk_write(reps.signature, signatures)