def convert(self, data_dir): self.reset_idx_counter() if not os.path.exists(data_dir): raise ValueError('data dir {} does not exist'.format(data_dir)) # load raw data train_data = load_json(os.path.join(data_dir, 'train.json')) val_data = load_json(os.path.join(data_dir, 'val_2.json')) test_data = load_json(os.path.join(data_dir, 'val_1.json')) # process data train_set = self.process_data(train_data, scope='train') val_set = self.process_data(val_data, scope='val') test_set = self.process_data(test_data, scope='test') return train_set, val_set, test_set
def convert(self, data_dir): self.reset_idx_counter() if not os.path.exists(data_dir): raise ValueError('data dir {} does not exist'.format(data_dir)) # load raw data charades = load_json(os.path.join(data_dir, 'charades.json')) train_data = load_lines( os.path.join(data_dir, 'charades_sta_train.txt')) test_data = load_lines(os.path.join(data_dir, 'charades_sta_test.txt')) # process data train_set = self.process_data(train_data, charades, scope='train') test_set = self.process_data(test_data, charades, scope='test') return train_set, None, test_set # train/val/test
torch.save( model.state_dict(), os.path.join( model_dir, '{}_{}.t7'.format(configs.model_name, global_step))) # only keep the top-3 model checkpoints filter_checkpoints(model_dir, suffix='t7', max_to_keep=3) model.train() score_writer.close() elif configs.mode.lower() == 'test': if not os.path.exists(model_dir): raise ValueError('No pre-trained weights exist') # load previous configs pre_configs = load_json(os.path.join(model_dir, "configs.json")) parser.set_defaults(**pre_configs) configs = parser.parse_args() # build model model = VSLNet(configs=configs, word_vectors=dataset['word_vector']).to(device) # get last checkpoint file filename = get_last_checkpoint(model_dir, suffix='t7') model.load_state_dict(torch.load(filename)) model.eval() r1i3, r1i5, r1i7, mi, _ = eval_test(model=model, data_loader=test_loader, device=device, mode='test') print("\n" + "\x1b[1;31m" + "Rank@1, IoU=0.3:\t{:.2f}".format(r1i3) + "\x1b[0m",
def validate(self, previous_loss): ''' Args: previous_loss: tuple, previously the best loss value Returns: tuple, Validation loss value ''' self.mus = musdb.DB(self.MUSDB_PATH, is_wav=True, subsets='train', split='valid') loss = torch.nn.L1Loss() conf = load_json(self.project_root + "config/json/" + self.model_name + ".json") decrease_ratio = conf['decrease_ratio'] bac_loss = [] voc_loss = [] t_start = time.time() with torch.no_grad(): for track in self.mus: print(track.name) # if("Alexander Ross - Goodbye Bolero" in track.name): # todo this song is broken on my server # continue bac = track.targets['accompaniment'].audio voc = track.targets['vocals'].audio for i in range(len(self.start)): portion_start, portion_end, real_end = self.start[ i], self.end[i], self.realend[i] reference_bac = self.seg(bac, portion_start, real_end) reference_voc = self.seg(voc, portion_start, real_end) input_bac = self.pre_pro(torch.Tensor(reference_bac)) input_voc = self.pre_pro(torch.Tensor(reference_voc)) input_f_background, input_f_vocals = before_forward_f( input_bac, input_voc, subband_num=self.split_band, device=self.device, sample_rate=self.sample_rate, normalize=False) input_f = (input_f_vocals + input_f_background) self.model.eval() out_bac = input_f * self.model(0, input_f) out_voc = input_f * self.model(1, input_f) self.model.train() bac_loss.append(float(loss(input_f_background, out_bac))) voc_loss.append(float(loss(input_f_vocals, out_voc))) t_end = time.time() ret = (np.average(bac_loss), np.average(voc_loss)) print("decrease-rate-threshold:", decrease_ratio) print("Validation time usage:", t_end - t_start, "s") print("Result: ", "bac-", ret[0], "voc-", ret[1]) print("Previous: ", "bac-", previous_loss[0], "voc-", previous_loss[1]) if (previous_loss[0] is None): return ret if (ret[0] / previous_loss[0] < decrease_ratio or ret[1] / previous_loss[1] < decrease_ratio): try: print("Save model") torch.save( self.model.state_dict(), self.project_root + "saved_models/" + self.model_name + "/model" + str(self.model.cnt) + ".pth") self.evaluate(save_wav=True, save_json=True) self.split_listener() return ret except Exception as e: logging.exception(e) return ret else: return previous_loss
def evaluate(self, save_wav=True, save_json=True): ''' Do evaluation on MUSDB18 test set Args: save_wav: boolean, save_json: boolean, save result json ''' def __fm(num): return format(num, ".2f") def __get_aproperate_keys(): keys = [] for each in list(res.keys()): if ("ALL" not in each): keys.append(each) return keys def __get_key_average(key, keys): util_list = [res[each][key] for each in keys] return np.mean(util_list) # sum(util_list) / (len(util_list) - 1) def __get_key_median(key, keys): util_list = [res[each][key] for each in keys] return np.median(util_list) def __get_key_std(key, keys): util_list = [res[each][key] for each in keys] return np.std(util_list) def __roc_val(item, key: list, value: list): for each in zip(key, value): res[item][each[0]] = each[1] def __cal_avg_val(keys: list): proper_keys = __get_aproperate_keys() for each in keys: res["ALL_median"][each] = 0 res["ALL_mean"][each] = 0 res["ALL_std"][each] = 0 res["ALL_median"][each] = __get_key_median(each, proper_keys) res["ALL_mean"][each] = __get_key_average(each, proper_keys) res["ALL_std"][each] = __get_key_std(each, proper_keys) print(each, ":") print(__fm(res["ALL_median"][each]), ",", __fm(res["ALL_mean"][each]), ",", __fm(res["ALL_std"][each])) self.mus = musdb.DB(self.MUSDB_PATH, is_wav=True, subsets='test') json_file_alias = self.project_root + "outputs/musdb_test/" + self.model_name + str( self.start_point) + "/result_" + self.model_name + str( self.start_point) + ".json" bac_keys = ["mus_sdr_bac", "mus_isr_bac", "mus_sir_bac", "mus_sar_bac"] voc_keys = ["mus_sdr_voc", "mus_isr_voc", "mus_sir_voc", "mus_sar_voc"] save_pth = self.project_root + "outputs/musdb_test/" + self.model_name + str( self.start_point) # if(os.path.exists(save_pth)): # print("Already exist: ", save_pth) # return if ( os.path.exists(json_file_alias + "@") ): # todo here we just do not want this program to find these json file res = load_json(json_file_alias) # print("Find:",res) res["ALL_median"] = {} res["ALL_mean"] = {} res["ALL_std"] = {} else: res = {} res["ALL_median"] = {} res["ALL_mean"] = {} res["ALL_std"] = {} dir_pth = self.test_pth pth = os.listdir(dir_pth) pth.sort() for cnt, track in enumerate(self.mus): # print("evaluating: ", track.name) res[track.name] = {} try: print("......................") background, vocal, origin_background, origin_vocal = self.split( track, save=save_wav, save_path=save_pth + "/", fname=track.name, ) eval_targets = ['vocals', 'accompaniment'] origin, estimate = {}, {} origin[eval_targets[0]], origin[ eval_targets[1]] = origin_vocal, origin_background estimate[eval_targets[0]], estimate[ eval_targets[1]] = vocal, background data = eval_mus_track(origin, estimate, output_dir=save_pth, track_name=track.name) print(data) museval_res = data.get_result() bac_values = [ museval_res['accompaniment']['SDR'], museval_res['accompaniment']['ISR'], museval_res['accompaniment']['SIR'], museval_res['accompaniment']['SAR'] ] voc_values = [ museval_res['vocals']['SDR'], museval_res['vocals']['ISR'], museval_res['vocals']['SIR'], museval_res['vocals']['SAR'] ] __roc_val(track.name, bac_keys, bac_values) __roc_val(track.name, voc_keys, voc_values) except Exception as e: print("ERROR: splitting error...") logging.exception(e) print("Result:") print("Median,", "Mean,", "Std") __cal_avg_val(bac_keys) __cal_avg_val(voc_keys) if (save_json == True): if (not os.path.exists(self.project_root + "outputs/musdb_test/" + self.model_name + str(self.start_point))): os.mkdir(self.project_root + "outputs/musdb_test/" + self.model_name + str(self.start_point)) write_json( res, self.project_root + "outputs/musdb_test/" + self.model_name + str(self.start_point) + "/result_" + self.model_name + str(self.start_point) + ".json")
def load_stored_result(self, json_path): self.scores = load_json(json_path)
def refresh_configuration(cls, path_to_config_json): conf_json = load_json(path_to_config_json) # Data path Config.MUSDB18_PATH = conf_json['PATH']['MUSDB18_PATH'] # Model configurations Config.sources = conf_json['MODEL']['sources'] Config.model_name = conf_json['MODEL'][ 'model_name'] # ["Unet-6" "MMDenseNet" "MDenseNet"] # Split four bands Config.subband = conf_json['SUBBAND']['number'] # Validation loss decrease threshold Config.decrease_ratio = conf_json["VALIDATION"]['decrease_ratio'] # Reload pre-trained model Config.load_model_path = conf_json['MODEL']['PRE-TRAINED'][ 'load_model_path'] Config.start_point = conf_json['MODEL']['PRE-TRAINED']['start_point'] # Hyper-params Config.epoches = conf_json["TRAIN"]['epoches'] Config.learning_rate = conf_json["TRAIN"]['learning_rate']['initial'] Config.batch_size = conf_json["TRAIN"]['batchsize'] Config.accumulation_step = conf_json["TRAIN"]['accumulation_step'] Config.gamma = conf_json["TRAIN"]['learning_rate']['gamma_decrease'] Config.frame_length = conf_json["TRAIN"]['frame_length'] Config.drop_rate = conf_json["TRAIN"]['dropout'] # Training Config.device_str = conf_json["TRAIN"]['device_str'] # loss conponents Config.loss_component = conf_json["TRAIN"]['loss'] # Additional data ## vocal data Config.additional_vocal_data = conf_json["PATH"]['additional_data'][ "additional_vocal_path"] ### background data Config.additional_accompaniment_data = conf_json["PATH"][ 'additional_data']["additional_accompaniments_path"] # TRAIN Config.every_n = conf_json["LOG"]["every_n"] Config.show_model_structure = True if conf_json["LOG"][ "show_model_structure"] == 1 else False ############################################################################## # Auto generated parameters Config.conf = {} Config.project_root = os.getcwd() + "/" Config.sample_rate = 44100 if (Config.model_name == "Unet-5"): Config.model_name_alias = "_unet_5_" if (Config.model_name == "Unet-6"): Config.model_name_alias = "_unet_6_" elif (Config.model_name == "MMDenseNet"): Config.model_name_alias = "MMDenseNet" elif (Config.model_name == "MDenseNet"): Config.model_name_alias = "MDenseNet" Config.num_workers = Config.batch_size if (len(Config.additional_vocal_data) != 0 or len(Config.additional_accompaniment_data) != 0): Config.BIG_DATA = True else: Config.BIG_DATA = False if (Config.BIG_DATA): Config.step_size = int(180000 / Config.batch_size) # Every 45 h else: Config.step_size = int(72000 / Config.batch_size) # Every 30 h if (not Config.BIG_DATA): Config.validation_interval = int(3600 / Config.batch_size) # Every 1.5 h else: Config.validation_interval = int(18000 / Config.batch_size) # Every 4.5h Config.split_band = True if Config.subband != 1 else False if "cuda" in str(Config.device_str): Config.use_gpu = True else: Config.use_gpu = False Config.device = torch.device( Config.device_str if Config.use_gpu else "cpu") # Build trail name cur = datetime.datetime.now() Config.trail_name = str(cur.year) + "_" + str(cur.month) + "_" + str( cur.day) + "_" + Config.model_name_alias + "sf" + str( Config.start_point) + "_" Config.counter = 1 for each in os.listdir(Config.project_root + "saved_models"): t = str(cur.year) + "_" + str(cur.month) + "_" + str(cur.day) if (t in each): for dirName in os.listdir(Config.project_root + "saved_models/" + each): if ("model" in dirName): Config.counter += 1 break Config.trail_name = str(Config.counter) + "_" + Config.trail_name for each in Config.loss_component: Config.trail_name += each + "_" Config.trail_name.strip("_") Config.trail_name += "_BD_" + str(Config.BIG_DATA) + "_lr" + str(Config.learning_rate).split(".")[-1] + "_" \ + "bs" + str(Config.batch_size) + "-" + str(Config.accumulation_step) + "_" \ + "fl" + str(Config.frame_length) + "_" \ + "ss" + str(Config.step_size) + "_" + str(Config.gamma).split(".")[-1] \ + "drop" + str(Config.drop_rate) \ + "split_band" + str(Config.split_band) + "_" + str(Config.subband) # +"emptyN"+str(empty_every_n)\ print( "Write config file at: ", Config.project_root + "config/json/" + Config.trail_name + ".json") write_json( Config.conf, Config.project_root + "config/json/" + Config.trail_name + ".json") Config.conf['model_name'] = Config.model_name Config.conf['split_band'] = Config.split_band Config.conf['decrease_ratio'] = Config.decrease_ratio Config.conf['start_point'] = Config.start_point Config.conf['learning_rate'] = Config.learning_rate Config.conf['batch_size'] = Config.batch_size Config.conf['accumulation_step'] = Config.accumulation_step Config.conf['step_size'] = Config.step_size Config.conf['gamma'] = Config.gamma Config.conf['sample_rate'] = Config.sample_rate Config.conf['frame_length'] = Config.frame_length Config.conf['drop_rate'] = Config.drop_rate find_and_build(Config.project_root, "outputs") find_and_build(Config.project_root, "outputs/listener") find_and_build(Config.project_root, "outputs/musdb_test") find_and_build(Config.project_root, "saved_model") find_and_build(Config.project_root, "config/json") find_and_build(Config.project_root, "evaluate/listener_todo")
def gen_or_load_dataset(configs): if not os.path.exists(configs.save_dir): os.makedirs(configs.save_dir) data_dir = os.path.join('data', 'dataset', configs.task) feature_dir = os.path.join('data', 'features', configs.task, configs.fv) if configs.suffix is None: save_path = os.path.join( configs.save_dir, '_'.join([configs.task, configs.fv, str(configs.max_pos_len)]) + '.pkl') else: save_path = os.path.join( configs.save_dir, '_'.join([ configs.task, configs.fv, str(configs.max_pos_len), configs.suffix ]) + '.pkl') if os.path.exists(save_path): dataset = load_pickle(save_path) return dataset feat_len_path = os.path.join(feature_dir, 'feature_shapes.json') emb_path = os.path.join('data', 'features', 'glove.840B.300d.txt') # load video feature length vfeat_lens = load_json(feat_len_path) for vid, vfeat_len in vfeat_lens.items(): vfeat_lens[vid] = min(configs.max_pos_len, vfeat_len) # load data if configs.task == 'charades': processor = CharadesProcessor() elif configs.task == 'activitynet': processor = ActivityNetProcessor() elif configs.task == 'tacos': processor = TACoSProcessor() else: raise ValueError('Unknown task {}!!!'.format(configs.task)) train_data, val_data, test_data = processor.convert(data_dir) # generate dataset data_list = [train_data, test_data ] if val_data is None else [train_data, val_data, test_data] word_dict, char_dict, vectors = vocab_emb_gen(data_list, emb_path) train_set = dataset_gen(train_data, vfeat_lens, word_dict, char_dict, configs.max_pos_len, 'train') val_set = None if val_data is None else dataset_gen( val_data, vfeat_lens, word_dict, char_dict, configs.max_pos_len, 'val') test_set = dataset_gen(test_data, vfeat_lens, word_dict, char_dict, configs.max_pos_len, 'test') # save dataset n_val = 0 if val_set is None else len(val_set) dataset = { 'train_set': train_set, 'val_set': val_set, 'test_set': test_set, 'word_dict': word_dict, 'char_dict': char_dict, 'word_vector': vectors, 'n_train': len(train_set), 'n_val': n_val, 'n_test': len(test_set), 'n_words': len(word_dict), 'n_chars': len(char_dict) } save_pickle(dataset, save_path) return dataset