def fit_model_mix_shuffle( experiment_name ): # mix folds of AudioSet and LiveSet train tensors and fit path_to_liveset = os.path.join('auxiliary_files', 'dataset', 'live_set_genres.pkl') # load audioset sets = dg.get_generated_sample(kd.AUDIOSET, [7, 2, 1]) x_tr_a, y_tr_a = sets['train'] x_val_a, y_val_a = sets['val'] # load liveset with open(path_to_liveset, "rb") as handle: dataset_dict = pickle.load(handle) x_tr_l, y_tr_l, _ = dataset_dict[ 'train'] # the last tuple unit is genre labels x_val_l, y_val_l, _ = dataset_dict['valid'] # concat datasets folds_count = 10 x_train, y_train = mix_folds(x_tr_a, x_tr_l, y_tr_a, y_tr_l, folds_count) x_valid, y_valid = mix_folds(x_val_a, x_val_l, y_val_a, y_val_l, folds_count) # checkpoints checkpoints_file = os.path.join('auxiliary_files', 'checkpoints', 'fullfit', experiment_name) if not os.path.isdir(checkpoints_file): os.makedirs(checkpoints_file, exist_ok=True) checkpoints_file = os.path.join(checkpoints_file, 'cpt.h5') callback_list = mix_model_callbacks(checkpoints_file) segmentator = sg() # init model history = segmentator.exec_fit( x_train, x_valid, y_train, y_valid, checkpoints_file, epochs=0, callback_list=callback_list) # 0 epochs for early stopping # save history history_file = os.path.join('auxiliary_files', 'history', 'new', experiment_name) if not os.path.isdir(history_file): os.makedirs(history_file, exist_ok=True) history_file = os.path.join(history_file, 'history.txt') with open(history_file, 'w') as f: print(history.history, file=f) metrics_dir = os.path.join('auxiliary_files', 'eval', 'new', experiment_name, 'metrics') if not os.path.isdir(metrics_dir): os.makedirs(metrics_dir, exist_ok=True) segmentator.evaluate(x_val_l, y_val_l, metrics_dir, plot_time_clamp=2000)
def dual_fit(experiment_name ): # fit on AudioSet, don't fix any weights, fit on LiveSet path_to_liveset = os.path.join('auxiliary_files', 'dataset', 'live_set_genres.pkl') segmentator = sg() # init model fit_model_audioset(segmentator, experiment_name) fit_model_live(segmentator, path_to_liveset, experiment_name)
def full_fit_pipeline( experiment_name): # fit on AudioSet, fix GRU & Dense, fit on LiveSet path_to_liveset = os.path.join('auxiliary_files', 'dataset', 'live_set_genres.pkl') segmentator = sg() # init model fit_model_audioset(segmentator, experiment_name) fix_model_params(segmentator) fit_model_live(segmentator, path_to_liveset, experiment_name)
if __name__ == "__main__": # preprocesing live samples ang get embeddings pp.preprocess_train(PATH_TO_META_FOLDER, PATH_TO_VIDEO_FOLDER, PATH_TO_WAV_FOLDER, seq_len=96) fe.get_audioset_features(PATH_TO_LIVE_DATA, PATH_TO_LIVE_DATA_WITH_EMBEDDINGS) # generate audioset and liveset samples sets = dg.get_generated_sample(kd.AUDIOSET, [7, 3, 1]) x_tr, y_tr = sets['train'] x_val, y_val = sets['val'] x_test, y_test = sets['test'] sets_l = dg.get_generated_sample( kd.LIVE, [7, 3, 1], path_to_live_data=PATH_TO_LIVE_DATA_WITH_EMBEDDINGS) x_tr_l, y_tr_l = sets_l['train'] x_val_l, y_val_l = sets_l['val'] x_test_l, y_test_l = sets_l['test'] # create model model = sg() # train model and evaluate model.exec_fit(x_tr, x_val, y_tr, y_val, PATH_TO_CHECKPOINT_FILE) model.evaluate(x_test_l, y_test_l, PATH_TO_METRIC_WITHOUT_TR) # additional train and evaluate model.exec_fit(x_tr_l, x_val_l, y_tr_l, y_val_l, PATH_TO_CHECKPOINT_FILE) model.evaluate(x_test_l, y_test_l, PATH_TO_METRIC_WITH_TR)