def matToWav(filename: pathlib.PosixPath): # convert a MAT-files created by TUCT and containing audiodata to WAV data # resolve path to absolute filename = filename.resolve() mat = scipy.io.loadmat(filename) baseNameMatch = re.search(r".*_A(.*)\.(?=MAT|mat)", filename.as_posix()) baseName = baseNameMatch.group(1) # without extension audioName = re.sub(r"(.*)(?:\.MAT|\.mat)$", r"\1.wav", filename.as_posix()) print(f"input file: {filename!s}") print(f"output file: {audioName!s}") fs = int(mat["TUCT_fs"]) if re.search("_OMNI", baseName): # OMNI suffix = [""] elif re.search("_BIN", baseName): # BINAURAL suffix = ["L", "R"] suffix = ["_" + suff for suff in suffix] elif re.search("_BF", baseName): # B-FORMAT nbrChannels = len(mat) - 1 maxOrder = sqrt(nbrChannel) - 1 print(f"order {maxOrder}") if maxOrder > 0: suffix = ["W", "Y", "Z", "X"] # ACN (order matters !) elif maxOrder > 1: suffix.extend(["V", "T", "R", "S", "U"]) # ACN (order matters !) elif maxOrder == 3: suffix.extend(["Q", "O", "M", "K", "L", "N", "P"]) # ACN (order matters !) else: raise ("wrong number of channels") suffix = ["_" + suff for suff in suffix] varNames = ["h_A" + baseName + suff for suff in suffix] y = np.hstack([np.array(mat[varName][0]) for varName in varNames]) mat.clear() scipy.io.wavfile.write(audioName, fs, y)
def create_tf_feature(image_file_path: pathlib.PosixPath, camera_token: str, corner_list: np.ndarray, image_width: int, image_height: int, boxes: List[Box]) -> tf.train.Example: box_feature_list = [(box.name, box.token, object_idx_dict[box.name]) for box in boxes] box_feature_list = list(map(list, zip(*box_feature_list))) BOX_NAME_INDEX = 0 BOX_TOKEN_INDEX = 1 BOX_NAME_ID_INDEX = 2 classes_text_list = [s.encode('utf-8') for s in box_feature_list[BOX_NAME_INDEX]] anns_token_list = [s.encode('utf-8') for s in box_feature_list[BOX_TOKEN_INDEX]] with tf.gfile.GFile(image_file_path.as_posix(), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() file_basename = image_file_path.as_posix() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature( file_basename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( camera_token.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(corner_list[:, 0] / float(image_width)), 'image/object/bbox/xmax': dataset_util.float_list_feature(corner_list[:, 1] / float(image_width)), 'image/object/bbox/ymin': dataset_util.float_list_feature(corner_list[:, 2] / float(image_height)), 'image/object/bbox/ymax': dataset_util.float_list_feature(corner_list[:, 3] / float(image_height)), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text_list), 'image/object/class/label': dataset_util.int64_list_feature(box_feature_list[2]), 'image/object/class/anns_id': dataset_util.bytes_list_feature(anns_token_list) } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def extract_archive(p_in: Union[Path, PosixPath], f_out: PosixPath, remove: bool = True) -> None: Archive(p_in.as_posix()).extractall(f_out.as_posix()) if remove: remove_file(p_in) return
def sendfile(self): if (self.config.ftp.enabled): self.logger.info("Uploading file to " + self.config.ftp.server) try: now = datetime.datetime.now() localfileobj = PosixPath(self.config.image.filename) remotepathobj = PosixPath( self.config.ftp.archive_dir).joinpath( now.strftime("%Y%m/%d")) remotepath = remotepathobj.as_posix() remotefilename = localfileobj.stem + "-" + now.strftime( "%Y%m%d-%H%M%S") + localfileobj.suffix remotefullpath = PosixPath(remotepath).joinpath( remotefilename).as_posix() self.logger.debug("Remote path: " + remotepath) self.logger.debug("Remote file: " + remotefilename) self.logger.debug("Remote fullpath: " + remotefullpath) self.session = ftplib.FTP(self.config.ftp.server, self.config.ftp.user, self.config.ftp.password) # make a new directory and don't complain if it's already there self.logger.info("Storing image in " + remotefullpath) try: self.ftpmkdir(remotepath) except Exception as e: self.logger.error("Error creating directory file: " + str(e.args)) self.data.lasterror = "creating directory failed" photo = open(self.config.image.filename, 'rb') # file to send self.session.storbinary('STOR ' + remotefullpath, photo) # send the file photo.close() # close file and FTP self.session.quit() self.session.close() self.logger.info("Upload completed successfully.") # Update remote database if (self.config.restapi.enabled): restclient = FileRestClient(self.config) restclient.new_file(remotefilename, remotepath, self.data.annotation_photo) self.logger.info("Updated website successfully.") else: self.logger.warn("Updating website disabled.") except NameError as e: self.logger.exception( 'Failed to FTP file: NameError in script: %s', e) self.data.lasterror = "FTP file failed" except Exception as e: self.logger.exception('Failed to FTP file: %s', e) self.data.lasterror = "FTP file failed" else: self.logger.warn("FTP upload disabled") self.data.lasterror = "FTP upload disabled"
def __gen_filename(self, key, version_id=None, filename=None): if filename is not None: filename = PosixPath(filename).resolve() if filename.is_file(): return filename.as_posix() elif filename.is_dir() and filename.exists(): basepath = filename else: return None else: basepath = PosixPath.cwd() leaf = key if version_id is not None: leaf = '%s-%s'%(leaf, version_id) return basepath.joinpath(leaf).as_posix()
def assert_all_migrations_present(dir: PosixPath) -> None: max_migration_id = get_max_migration_id(dir) filenames: List[str] = os.listdir(dir.as_posix()) for migration_id in range(1, max_migration_id + 1): # todo - assertions can be ignored...? assert f'{migration_id}_up.sql' in filenames, f'Migration {migration_id} missing ups' assert f'{migration_id}_down.sql' in filenames, f'Migration {migration_id} missing downs' extra_files: Set[str] = ( set(filenames) - {f'{m_id}_up.sql' for m_id in range(1, max_migration_id + 1)} - {f'{m_id}_down.sql' for m_id in range(1, max_migration_id + 1)}) if extra_files: print( 'ERROR: Extra files not of pattern "(id)_up.sql" or "(id)_down.sql": ' ) print(*extra_files, sep='\n') exit(3)
def create_manifest(path_to_audioset_folder, size=None, data_type='eval', num_classes=183): """ :param data_path: (str) path to the folder where all the wav files are :param output_name: (str) name of the manifest, that will be a json file dataset_manifest_datatype_num_classes.json :param manifest_path: (str) directory in which the manifest will be stored :param file_extension: (str) wav file per default :return: A json file containing for each sample the path to the txt file and the path to the wav file """ if size == 'small': output_name = 'audioset_small_{}_manifest_{}.json'.format( data_type, num_classes) else: output_name = 'audioset_{}_manifest_{}.json'.format( data_type, num_classes) data_path = os.path.abspath( Path(path_to_audioset_folder) / PosixPath(data_type)) manifest_path = path_to_audioset_folder file_paths = list( Path(data_path / PosixPath('txt_{}'.format(num_classes))).rglob(f"*.txt")) output_path = Path(manifest_path) / output_name output_path.parent.mkdir(exist_ok=True, parents=True) manifest = {'root_path': data_path, 'samples': []} for txt_path in tqdm(file_paths, total=len(file_paths)): txt_path = txt_path.relative_to(data_path) # Define path to write in annotations txt_file = str(txt_path).split('/')[-1] wav_name = PosixPath(txt_file.replace('.txt', '.wav')) transcript_path = data_path / PosixPath( 'txt_{}'.format(num_classes)) / txt_file new_wav_path = PosixPath('/gpfsdswork/dataset/AudioSet') / PosixPath( data_type) / PosixPath(str(wav_name)[0]) / wav_name # sys.stdout.write(' \r TXT PATH: {} '.format(transcript_path)) sys.stdout.write(' \r WAV PATH: {} '.format(new_wav_path)) # Write new data in the manifest if size == 'small': if Path(new_wav_path).is_file(): manifest['samples'].append({ 'wav_path': new_wav_path.as_posix(), 'transcript_path': transcript_path.as_posix() }) else: manifest['samples'].append({ 'wav_path': new_wav_path.as_posix(), 'transcript_path': transcript_path.as_posix() }) output_path.write_text(json.dumps(manifest, indent=4), encoding='utf8')
def train(settings: dict, output_path: PosixPath): """Main.""" gpu_num = len(settings["gpu_devices"]) # # make dataset # # # read meta info. train_df = pd.read_csv( config.PROC_DATA / "train_add-{}fold-index.csv".format(settings["n_folds"])) # # # make label arr train_labels_arr = train_df[config.COMP_NAMES].values.astype("i") # # # make train set if settings["val_fold"] != -1: train_dataset = datasets.LabeledImageDataset( pairs=list( zip((train_df[train_df["fold"] != settings["val_fold"]] ["image_id"] + ".png").tolist(), train_labels_arr[train_df["fold"] != settings["val_fold"], ...])), root=config.TRAIN_IMAGES_DIR.as_posix()) else: train_dataset = datasets.LabeledImageDataset( pairs=list( zip((train_df["image_id"] + ".png").tolist(), train_labels_arr)), root=config.TRAIN_IMAGES_DIR.as_posix()) train_dataset = datasets.TransformDataset( train_dataset, nn_training.ImageTransformer(settings["training_transforms"])) if gpu_num > 1: # # if using multi-gpu, split train set into gpu_num. train_sub_dataset_list = [] total_size = len(train_dataset) subset_size = (total_size + gpu_num - 1) // gpu_num np.random.seed(1086) random_order = np.random.permutation(len(train_dataset)) for i in range(gpu_num): start_idx = min(i * subset_size, total_size - subset_size) end_idx = min((i + 1) * subset_size, total_size) print(i, start_idx, end_idx) train_sub_dataset_list.append( datasets.SubDataset(train_dataset, start=start_idx, finish=end_idx, order=random_order)) train_dataset = train_sub_dataset_list for i, subset in enumerate(train_dataset): print("subset{}: {}".format(i, len(subset))) # # # # validation set if settings["val_fold"] != -1: val_dataset = datasets.LabeledImageDataset( pairs=list( zip((train_df[train_df["fold"] == settings["val_fold"]] ["image_id"] + ".png").tolist(), train_labels_arr[train_df["fold"] == settings["val_fold"], ...])), root=config.TRAIN_IMAGES_DIR.as_posix()) else: # # if train models using all train data, calc loss for all data at the evaluation step. val_dataset = datasets.LabeledImageDataset( pairs=list( zip((train_df["image_id"] + ".png").tolist(), train_labels_arr)), root=config.TRAIN_IMAGES_DIR.as_posix()) val_dataset = datasets.TransformDataset( val_dataset, nn_training.ImageTransformer(settings["inference_transforms"])) print("[make dataset] train: {}, val: {}".format(len(train_dataset), len(val_dataset))) # # initialize model. model = nn_training.ImageClassificationModel( extractor=getattr( backborn_chains, settings["backborn_class"])(**settings["backborn_kwargs"]), global_pooling=None if settings["pooling_class"] is None else getattr( global_pooling_chains, settings["pooling_class"])( **settings["pooling_kwargs"]), classifier=getattr(classifer_chains, settings["head_class"])(**settings["head_kwargs"])) model.name = settings["model_name"] # # set training wrapper. train_model = nn_training.CustomClassifier( predictor=model, lossfun=getattr( nn_training, settings["loss_function"][0])(**settings["loss_function"][1]), evalfun_dict={ "SCE_{}".format(i): getattr(nn_training, name)(**param) for i, (name, param) in enumerate(settings["eval_functions"]) }) settings["eval_func_names"] = [ "SCE_{}".format(i) for i in range(len(settings["eval_functions"])) ] gc.collect() # # training. # # # create trainer. utils.set_random_seed(settings["seed"]) trainer = nn_training.create_trainer(settings, output_path.as_posix(), train_model, train_dataset, val_dataset) trainer.run() # # # save model of last epoch, model = trainer.updater.get_optimizer('main').target.predictor serializers.save_npz(output_path / "model_snapshot_last_epoch.npz", model) del trainer del train_model gc.collect() # # inference validation data by the model of last epoch. _, val_iter, _ = nn_training.create_iterator(settings, None, val_dataset, None) val_pred, val_label = nn_training.inference_test_data( model, val_iter, gpu_device=settings["gpu_devices"][0]) np.save(output_path / "val_pred_arr_fold{}".format(settings["val_fold"]), val_pred) # # calc validation score score_list = [[] for i in range(2)] for i in range(len(config.N_CLASSES)): y_pred_subset = val_pred[:, config.COMP_INDEXS[i]:config. COMP_INDEXS[i + 1]].argmax(axis=1) y_true_subset = val_label[:, i] score_list[0].append( recall_score(y_true_subset, y_pred_subset, average='macro', zero_division=0)) score_list[1].append( recall_score(y_true_subset, y_pred_subset, average='macro', zero_division=1)) score_list[0].append(np.average(score_list[0], weights=[2, 1, 1])) score_list[1].append(np.average(score_list[1], weights=[2, 1, 1])) score_df = pd.DataFrame(score_list, columns=config.COMP_NAMES + ["score"]) print(score_df) score_df.to_csv(output_path / "score.csv", index=False)
def get_migration_files_filtered(dir: PosixPath) -> List[str]: return [ file for file in os.listdir(dir.as_posix()) if file.lower().endswith(".sql") ]
def get_max_migration_id(dir: PosixPath) -> int: filenames: List[str] = os.listdir(dir.as_posix()) return max(get_migration_id(file_name) for file_name in filenames)
def global_mocks(monkeypatch: MonkeyPatch, tmp_path: PosixPath): """Gathers all mocks that should be applied to all tests in the file """ monkeypatch.setattr(const, 'DATA_PATH', tmp_path.as_posix())