def test_filter_files_orphan_ref(): from utilities.utilities import Utilities u = Utilities() data = ['blah.txt', 'blah.ogg', 'orphan.txt'] expected = ['blah.txt', 'blah.ogg'] result = u.filter_files(data) assert result == expected
def test_filter_files_doc(): from utilities.utilities import Utilities u = Utilities() data = ['file.txt', 'hello.doc', 'file.wav'] expected = ['file.txt', 'file.wav'] result = u.filter_files(data) assert result == expected
raw_file_list = list() # Get either local files or cloud storage if not local_files_path: # Get list of all files in google cloud storage (gcs) bucket gcs = GCS() raw_file_list = gcs.get_file_list(cloud_store_uri) else: raw_file_list = utilities.local_files(local_files_path) logger.debug(f'RAW FILE LIST: {raw_file_list}') # Filter file list filtered_file_list = utilities.filter_files(raw_file_list, only_transcribe) if not local_files_path: final_file_list = [utilities.append_uri(cloud_store_uri, file) for file in filtered_file_list] else: final_file_list = filtered_file_list logger.debug(f'FINAL FILE LIST: {final_file_list}') # if only doing transcriptions, add diarization and punctuation? dia = False punct = False c = None if only_transcribe: dia = input('Add Diarization Y/N ') if dia.lower() == 'y':