def get_split_val_feats(self, feat_name:str, split_val_idxs:list, feats_maker_params: dict = None, forced=False): """ if None, use feats_maker to gen and write. then read. :param feat_name: :param forced: if force to regenerate test features, classifiers can force to use augmented features for test data. :return: """ if self.split_val_num is None: self.split_val_num = self.raw_data_db.split_val_sample_num if self.split_val_feats_status_table.get(feat_name) is False or forced is True: # 1. clear old. self.split_val_feats_data_tables[feat_name] = np.array([None] * self.split_val_num) # 2. get raw data need_make_feats_rawdata = self.raw_data_db.raw_train_x_np_table[split_val_idxs] # 3. use feats params to make TEST feats make_feats_done = self.raw_feat_makers_table.get(feat_name).make_features( need_make_feats_rawdata, feats_maker_params ) make_feats_done = np.array(make_feats_done) # 3. put it into val_feats_table. assert ( len(make_feats_done) == self.split_val_num ), "Error, split_val_num={}, but len split_val_feats={}".format(self.split_val_num, len(make_feats_done)) self.split_val_feats_data_tables[feat_name] = make_feats_done self.split_val_feats_status_table[feat_name] = True info("Note: split_val_feats new updated, feat_num={}".format(feat_name)) return self.split_val_feats_data_tables.get(feat_name)
def renew_if_multilabel(self, is_multilabel=False): self.is_multilabel = is_multilabel if self.is_multilabel is False: pass else: self.model = self.ml_model info("Note: is_multilabel={}, tr34_cls updated.".format(self.is_multilabel))
def mel_feats_transform(x_mel): x_feas = [] info_log = list() for i in range(len(x_mel)): mel = np.mean(x_mel[i], axis=0).reshape(-1) mel_std = np.std(x_mel[i], axis=0).reshape(-1) # contrast = np.mean(x_contrast[i], axis=0).reshape(-1) # contrast_std = np.std(x_contrast[i], axis=0).reshape(-1) # contrast, contrast_std fea_item = np.concatenate([mel, mel_std], axis=-1) x_feas.append(fea_item) if i < 1: info_log.append("i={}, x_mel type={}, shape={}".format( i, type(x_mel[i]), x_mel[i].shape)) info_log.append("i={}, mel type={}, shape={}".format( i, type(mel), mel.shape)) info_log.append("i={}, mel_std type={}, shape={}".format( i, type(mel_std), mel_std.shape)) info_log.append("i={}, fea_item type={}, shape={}".format( i, type(fea_item), fea_item.shape)) x_feas = np.asarray(x_feas) scaler = StandardScaler() X = scaler.fit_transform(x_feas[:, :]) # info_log.append("FEET_MODE = {}, x_mel type={}, shape={}, cost_time={}s".format(FEET_MODE, type(x_mel), x_mel.shape, round(t2-t1, 3))) info_log.append("x_feas type={}, shape={}".format(type(x_feas), x_feas.shape)) info_log.append("X type={}, shape={}".format(type(X), X.shape)) info(json.dumps(info_log, indent=4)) return X
def predict_proba_multilabel(self, test_examples: np.ndarray): if self.ml_mode == 1: all_preds = [] for cls in range(self.class_num): preds = self.ml_models[cls].predict_proba(test_examples) # preds = self.logReg_pipeline.predict_proba(test_examples) info("cls={}, preds shape={}, data={}".format( cls, preds.shape, preds)) all_preds.append(preds[:, 1]) preds = np.stack(all_preds, axis=1) elif self.ml_mode == 2: preds = self.ml_model.predict_proba(test_examples) elif self.ml_mode == 3: preds = self.ml_model.predict_proba(test_examples) all_preds = [] for cls in range(self.class_num): preds = self.ml_models[cls].predict_proba(test_examples) preds = self.logReg_pipeline.predict_proba(test_examples) # info("cls={}, preds shape={}, data={}".format(cls, preds.shape, preds)) all_preds.append(preds[:, 1]) preds = np.stack(all_preds, axis=1) else: error("Error: wrong ml_mode={}".format(self.ml_mode)) preds = self.ml_model.predict_proba(test_examples) info("multilabel, preds shape={} , data={}".format(preds.shape, preds)) return preds
def init(self, class_num: int, init_params: dict): """ :param class_num: :param init_params: - n_mels - pretrain_path :return: """ self.class_num = class_num self.clf_name = "cnn_pret" self.n_mels = init_params.get("n_mels") # 64, fixed, as pretrained. # self.model = self._load_pretrained_model(input_shape=(self.n_mels, self.n_mels, 1), n_classes=self.class_num) self.model = cnn_load_pretrained_model(input_shape=(self.n_mels, self.n_mels, 1), n_classes=self.class_num) info("Backbone classifier={} is init, class_num={}, init_params={}". format(self.clf_name, self.class_num, init_params)) as_timer("clf_{}_init".format(self.clf_name)) self.train_batch_size = init_params.get("train_batch_size") self.predict_batch_size = init_params.get("predict_batch_size") self.n_iter = 0 # option: self.img_freqmasking_datagen = ImageDataGenerator( preprocessing_function=DNpAugPreprocessor.frequency_masking)
def init_even_class_index_by_each(self, each_class_index_list): even_class_index = [] # per_class_下限是1. sample_per_class = max(int(self._train_y_num / self._num_classes), 1) log_info = list() for i in range(self._num_classes): class_cnt = len(each_class_index_list[i]) tmp = [] log_info.append([i, class_cnt]) # log("init even class index, class_id={}, class_cnt={}".format(i, class_cnt)) # fixme: bug, class_cnt 可能为0, 允许此类情况,即部分Label为空. if class_cnt == 0: info("Init even class index, class_id={} cn=0".format(i)) pass elif class_cnt < sample_per_class: # fixme: 此处为少量时,强行通过copy方式将类别补齐到 samplez_per_class. 应为两种模式:不补齐/补齐. tmp = each_class_index_list[i] * \ int(sample_per_class / class_cnt) tmp += random.sample(each_class_index_list[i], sample_per_class - len(tmp)) else: tmp += random.sample(each_class_index_list[i], sample_per_class) random.shuffle(tmp) even_class_index.append(tmp) info("Init even class index, class_id, class_cnt={}".format(log_info)) return even_class_index
def gen_committee(self, voting_conditions: dict): """ for example: voting_conditions = { "val_auc": 5, "val_acc": 5, "val_loss": 5, } :param voting_conditions: :return: """ self.commitee_id_pool = list() for k, v in voting_conditions.items(): assert k in self.COMM_KEY_LIST condition_comit_ids = self.eval_pred_space.g_sort_idx_pool.get( k)[:v] condition_comit_values = [ self.eval_pred_space.eval_pred_pool[i].get(k) for i in condition_comit_ids ] info("Note: k={}, ids={}, values={}".format( k, condition_comit_ids, condition_comit_values)) # If Acc=1.0 filter them. if k == "t_acc" and self.eval_pred_space.g_sort_train_accs[: 10].count( 1 ) == 10: info("Overfitting, top10 acc = 1, remove them.") else: self.commitee_id_pool.extend(condition_comit_ids)
def decide_if_renew_testfeats(self): info("Test, tr34.test_idx={}, spec_len_status={}".format(self.test_idx, self.spec_len_status)) if self.test_idx == 1 or self.spec_len_status == 2: self.spec_len_status = 0 self.imp_feat_args["mode"] = "test" return True else: return False
def decide_if_renew_trainfeats(self): # must setup mode. self.try_to_update_spec_len() self.imp_feat_args["mode"] = "train" info("Train, spec_len_status={}, imp_feat_args={}".format(self.spec_len_status, self.imp_feat_args)) if self.spec_len_status == 1: self.spec_len_status = 2 return True
def init(self, class_num: int, init_params: dict): self.clf_name = "ml_sl_lr_liblinear" self.class_num = class_num self.model = logistic.LogisticRegression(solver="liblinear") info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params)) pass
def put_raw_train_np(self, raw_train_x_array, raw_train_y_array): put_len = len(raw_train_x_array) for i in range(put_len): self.raw_train_x_np_table[i] = np.array(raw_train_x_array[i]) self.raw_train_y_np_table[i] = np.array(raw_train_y_array[i]) self.raw_train_np_filled_num = put_len self.raw_train_x_np_table_filled = self.raw_train_x_np_table[: self.raw_train_np_filled_num] info("put_len={}".format(put_len)) self.raw_train_y_np_table_filled = self.raw_train_y_np_table[: self.raw_train_np_filled_num]
def minisamples_edaer(mini_xs: list, mini_y: np.ndarray): """ :param mini_xs: list of array :param mini_y: array of array, onehot type. :return: """ # for x x_seq_len_list = list() for x_train_sample in mini_xs: len_a_x_sample = x_train_sample.shape[0] x_seq_len_list.append(len_a_x_sample) x_word_len_array = np.array(x_seq_len_list) x_seq_len_mean = x_word_len_array.mean() x_seq_len_std = x_word_len_array.std() print("num={}, len_mean={}".format(len(mini_xs), x_seq_len_mean)) # for y. mini_num, class_num = mini_y.shape[0], mini_y.shape[1] each_class_index = [] class_val_count = 0 for i in range(class_num): # info("i={}, c={}".format(i, cur_train_y[:, i])) where_i = np.where(mini_y[:, i] == 1) # print(where_i) class_i_ids = list(where_i[0]) if len(class_i_ids) > 0: class_val_count += 1 each_class_index.append(class_i_ids) # print(each_class_index) class_cover_rate = round(class_val_count / class_num, 4) class_dis_array = [round(len(i) / mini_num, 4) for i in each_class_index] # eda: if multi-label onehot_y_sum = np.sum(mini_y) is_multilabel = False if onehot_y_sum > mini_num: is_multilabel = True info("EDA: mini_num={}, onehot_y_sum={}, is_multilabel={}".format( mini_num, onehot_y_sum, is_multilabel)) # print("class_count={}, class_dis_array={}".format(class_cover_rate, class_dis_array)) mini_eda_report = { "minis_num": len(mini_xs), "x_seqlen_mean": round(x_seq_len_mean, 4), "x_seqlen_std": round(x_seq_len_std, 4), "y_cover_rate": class_cover_rate, "y_dis_array": class_dis_array, "is_multilabel": is_multilabel } return mini_eda_report
def update_spec_len(self, train_spec_len, test_spec_len): info("Tr34, round_idx={}, and update_spec_len, spec_len_status={}".format( self.round_idx, self.spec_len_status) ) self.imp_feat_args = { "train_spec_len": train_spec_len, "test_spec_len": test_spec_len, "train_wav_len": train_spec_len * 160, "test_wav_len": test_spec_len * 160, "mode": "train", } self.spec_len_status = 1 return True
def make_features(self, raw_data, feats_maker_params: dict): """ :param raw_data: :param feats_maker_params: { "len_samples": 5, "sr": SAMPLING_RATE, } :return: """ if isinstance(raw_data, list): info("raw_data, len={}, ele_type={}".format( len(raw_data), type(raw_data[0]))) elif isinstance(raw_data, np.ndarray): info("raw_data, shape={}, ele_type={}".format( raw_data.shape, type(raw_data[0]))) else: pass raw_data = [ sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in raw_data ] X = get_fixed_array(raw_data, len_sample=feats_maker_params.get("len_sample"), sr=feats_maker_params.get("sr")) info("note: exft get fix done.") # X = model.predict(X) X = self.kapre_melspectrogram_extractor.predict(X) info( "note: exft model kapre_extractor predict done, kapre predict shape={}" .format(X.shape)) # basic: (147, 30, 125, 1) to (147, 125, 30, 1) to (147, 125, 30) # X = X.transpose(0, 2, 1, 3) # X = np.squeeze(X) # basic + powertodb. squeeze->powertodb->transpose # squeeze. X = np.squeeze(X) X = X.transpose(0, 2, 1) info("note: exft model transpose and squeeze done, shape={}".format( X.shape)) # tranform melspectrogram features. X = mel_feats_transform(X) return X
def get_raw_test_feats(self, feat_name, feats_maker_params: dict = None, forced=False): """ if None, use feats_maker to gen and write. then read. :param feat_name: :param forced: if force to regenerate test features, classifiers can force to use augmented features for test data. :return: """ info("Test: feat_name={}. feats_make_params={}, forced={}".format(feat_name, feats_maker_params, forced)) if self.raw_test_feats_status_table.get(feat_name) is False or forced is True: raw_test_feats_np = self.raw_feat_makers_table.get(feat_name).make_features( self.raw_data_db.raw_test_x_np_table, feats_maker_params ) self.put_raw_test_feats(feat_name, raw_test_feats_np) self.raw_test_feats_status_table[feat_name] = True return self.raw_test_feats_data_tables.get(feat_name)
def init_kapre_melspectrogram_extractor(self): self.kapre_melspectrogram_extractor = self.make_melspectrogram_extractor( (1, self.kape_params.get("CROP_SEC") * self.kape_params.get("SAMPLING_RATE"))) if KAPRE_FMAKER_WARMUP: warmup_size = 10 warmup_x = [ np.array([np.random.uniform() for i in range(48000)], dtype=np.float32) for j in range(warmup_size) ] # warmup_x_mel = extract_features(warmup_x) warmup_x_mel = self.make_features(warmup_x, feats_maker_params={ "len_sample": 5, "sr": 16000 }) info("Kpare_featmaker warmup.") as_timer("Kpare_featmaker_warmup")
def init(self, class_num, init_params: dict): self.clf_name = "sl_lr_sag" self.class_num = class_num self.max_iter = init_params.get("max_iter") # self.model = logistic.LogisticRegression(solver="sag", max_iter=self.max_iter) self.model = logistic.LogisticRegression(C=1.0, max_iter=self.max_iter, solver="sag", multi_class="auto") # self.model = OneVsRestClassifier(logistic.LogisticRegression(C=1.0, max_iter=self.max_iter, solver="sag", multi_class="auto")) self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params))
def get_loss_godown_rate(self, g_loss_list, tail_window_size): tail_loss_list = g_loss_list[-tail_window_size:] loss_num = len(tail_loss_list) loss_godown_count_list = list() for i in range(1, loss_num): if tail_loss_list[i] - tail_loss_list[i - 1] < 0: loss_godown_count_list.append(1) else: loss_godown_count_list.append(-1) # print(loss_godown_count_list) loss_godown_count_num = loss_godown_count_list.count(1) loss_godown_count_rate = round(loss_godown_count_num / (loss_num - 1), 4) info("loss_num={}, godown_num={}, loss_godown_rate={}".format( loss_num, loss_godown_count_num, loss_godown_count_rate)) return loss_godown_count_rate pass
def get_fixed_array(X_list, len_sample=5, sr=SAMPLING_RATE): for i in range(len(X_list)): if len(X_list[i]) < len_sample * sr: n_repeat = np.ceil(sr * len_sample / X_list[i].shape[0]).astype( np.int32) # info("X_list[i] type={}, len={}, lssr={}, n_repeat={}".format(type(X_list[i]), len(X_list[i]), len_sample*sr, n_repeat)) X_list[i] = np.tile(X_list[i], n_repeat) # info("X_list[i] new len={}".format(len(X_list[i]))) X_list[i] = X_list[i][:len_sample * sr] # info("xlist_i shape={}".format(X_list[i].shape)) X = np.asarray(X_list) info("x shape={}".format(X.shape)) X = np.stack(X) info("x shape={}".format(X.shape)) X = X[:, :, np.newaxis] X = X.transpose(0, 2, 1) return X
def init_each_class_index_by_y(self, cur_train_y): # cur_train_y = np.array(cur_train_y) cur_train_y = np.stack(cur_train_y) each_class_count = np.sum(np.array(cur_train_y), axis=0) self._max_class_num, self._min_class_num = int( np.max(each_class_count)), int(np.min(each_class_count)) info('Raw train data: train_num(without val) {}; '.format( len(cur_train_y)) + 'class_num {} ; max_class_num {}; min_class_num {}; '.format( self._num_classes, self._max_class_num, self._min_class_num)) # fixme: could be simplified, just classid_inverted_index. info("cur_train_y shape={}, data={}".format(cur_train_y.shape, cur_train_y)) each_class_index = [] for i in range(self._num_classes): # info("i={}, c={}".format(i, cur_train_y[:, i])) each_class_index.append(list(np.where(cur_train_y[:, i] == 1)[0])) return each_class_index
def put_epoch_eval_preds(self, epoch_item): """ make sure that item is full of values, use default if null. epoch_item = { "model_name" "t_loss": loss_value, "t_acc": acc_value, "val_acc": val_acc_value, "val_loss": val_loss_value, "val_auc": val_auc_value, "pred_probas": pred_proba_value } :param epoch_item: :return: None, update EpochEvalPredPool """ self.eval_pred_pool.append(epoch_item) self.g_sort_val_nauc_idxs, self.g_sort_val_naucs = listofdict_topn_sorter( raw_listofdict=self.eval_pred_pool, attr_key="val_nauc") # self.g_sort_val_acc_idxs, self.g_sort_val_accs = listofdict_topn_sorter( # raw_listofdict=self.eval_pred_pool, attr_key="val_acc" # ) self.g_sort_train_loss_idxs, self.g_sort_train_losss = listofdict_topn_sorter( raw_listofdict=self.eval_pred_pool, attr_key="t_loss", reverse=False) self.g_sort_train_acc_idxs, self.g_sort_train_accs = listofdict_topn_sorter( raw_listofdict=self.eval_pred_pool, attr_key="t_acc") info("Add evel_pred, top10_train_loss={}".format( self.g_sort_train_losss[:10])) info("Add evel_pred, top10_train_acc={}".format( self.g_sort_train_accs[:10])) self.g_sort_idx_pool["val_nauc"] = self.g_sort_val_nauc_idxs # self.g_sort_idx_pool["val_acc"] = self.g_sort_val_acc_idxs self.g_sort_idx_pool["t_loss"] = self.g_sort_train_loss_idxs self.g_sort_idx_pool["t_acc"] = self.g_sort_train_acc_idxs
def init(self, class_num: int, init_params: dict = None): self.clf_name = "sl_lr_liblinear" self.class_num = class_num # for single labels. self.model = logistic.LogisticRegression(solver="liblinear") self.ml_mode = 2 # for multi-labels # mode-1: class_num * onevsrestclassifier+lr self.ml_models = [ OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) for i in range(class_num) ] # mode-2: onevsrestclassifier+lr self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) #mode-3: Pipeline + onevsrestclassifier+lr self.logReg_pipeline = Pipeline([ ('clf', OneVsRestClassifier( logistic.LogisticRegression(solver='liblinear'), n_jobs=-1)), ]) # for multi-labels. # mode-1: + onevsrestclassifier # mode-2: + decision tree. # self.model = DecisionTreeClassifier() info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params))
def pre_trans_wav_update(self, wav_list, params): info("pre_trans_wav len={}, params={}".format(len(wav_list), params)) if len(wav_list) == 0: return [] # set=10, test for single CPU =10000 elif len(wav_list) > NCPU * 2: info("note: using pool pre_trans_wav len={}".format(len(wav_list))) # with Pool(NCPU) as pool: mag_arr = pool.starmap(wav_to_mag, zip(wav_list, repeat(params))) # # mag_arr = pool.starmap(wav_to_mag, zip(np.asfortranarray(wav_list), repeat(params))) return mag_arr else: info("note: using no pool pre_trans_wav len={}".format( len(wav_list))) mag_arr = [wav_to_mag(wav, params) for wav in wav_list] info("note: using no pool pre_trans_wav done len={}".format( len(wav_list))) return mag_arr
def extract_for_one_sample(tuple, extract, use_power_db=False, **kwargs): data, idx = tuple r = extract(data, **kwargs) info("note: feee=librosa, extract r shape={}".format(r.shape)) # for melspectrogram if use_power_db: r = librosa.power_to_db(r) info("note: feee=librosa, after power_to_db r shape={}".format(r.shape)) r = r.transpose() info("note: feee=librosa, after transpose r shape={}".format(r.shape)) return r, idx
def get_raw_train_feats(self, feat_name, raw_train_idxs, feats_maker_params: dict = None, forced=False): """ 1. check need_make_feats_idxs 2. get raw_data by make_feats_idx 3. make_feats(feats_maker, raw_data_np) 4. write_back(update_featss 5. read updated feats. :param feat_name: :param raw_train_idxs: :param feats_maker_params: :param forced: if True, re-generate and write back to feat_np_table. :return: """ # check if is None first, if is None, using feats_maker need_make_feats_idxs = list() if forced: # clear feat_name table. self.raw_train_feats_data_tables[feat_name] = np.array([None] * self.raw_train_num) need_make_feats_idxs = raw_train_idxs else: for raw_train_idx in raw_train_idxs: if self.raw_train_feats_data_tables.get(feat_name)[raw_train_idx] is None: need_make_feats_idxs.append(raw_train_idx) info( "if_forced={}, feat_name={}, need_make_feats_idx len={}, content={}".format( forced, feat_name, len(need_make_feats_idxs), need_make_feats_idxs[:5] ) ) # fixme: check length. if len(need_make_feats_idxs) > 0: # 2. get raw data need_make_feats_rawdata = self.raw_data_db.raw_train_x_np_table[need_make_feats_idxs] # 3. make_feats make_feats_done = self.raw_feat_makers_table.get(feat_name).make_features( need_make_feats_rawdata, feats_maker_params ) make_feats_done = np.array(make_feats_done) info("make_feats_done, type={}, shape={}".format(type(make_feats_done), make_feats_done.shape)) # 4. write back to feat_table for i in range(len(need_make_feats_idxs)): self.raw_train_feats_data_tables.get(feat_name)[need_make_feats_idxs[i]] = make_feats_done[i] # 5. read from updated feat_table. cur_train_feats = [self.raw_train_feats_data_tables.get(feat_name)[i].shape for i in raw_train_idxs] info("cur_train_feats, shape_list={}".format(cur_train_feats[:3])) return np.stack(self.raw_train_feats_data_tables.get(feat_name)[raw_train_idxs])
def train(self, dataset, remaining_time_budget=None): """Train method of domain-specific model.""" logger.info("Note: speech_train_process model.py starts train") as_timer("train_start") if IF_TRAIN_BREAK_CONDITION: while True: self.cur_train_his_report = self.domain_model.train_pipeline(dataset) self.cur_cls_name = self.cur_train_his_report.get("cls_name") cur_val_nauc = self.cur_train_his_report["val_nauc"] self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc]) self.ensemble_val_nauc_list.append(cur_val_nauc) if cur_val_nauc == -1 or cur_val_nauc > self.get_accept_nauc(): info("Decision=Yes, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list))) break else: info("Decision=No, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list))) else: self.cur_train_his_report = self.domain_model.train_pipeline(dataset) self.cur_cls_name = self.cur_train_his_report.get("cls_name") cur_t_loss = self.cur_train_his_report.get("t_loss") if cur_t_loss is None: self.g_train_loss_list.append(100000) else: self.g_train_loss_list.append(cur_t_loss) info("train_his_report={}".format(self.cur_train_his_report)) cur_val_nauc = self.cur_train_his_report["val_nauc"] self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc]) self.ensemble_val_nauc_list.append(cur_val_nauc) as_timer("speech_model_basic_train")
def wav_to_mag(wav, params, win_length=400, hop_length=160, n_fft=512): mode = params["mode"] # info("ori_wav_len={}".format(len(wav))) wav = extend_wav(wav, params["train_wav_len"], params["test_wav_len"], mode=mode) # info("extend_wav_len={}".format(len(wav))) wav2feat_mode = 1 if wav2feat_mode == 0: # 1. original: # linear_spect = lin_spectogram_from_wav(wav, hop_length, win_length, n_fft) # mag, _ = librosa.magphase(linear_spect) # mag_T = mag.T pass elif wav2feat_mode == 1: # 2. wav2linear_spectrogram: stft+magphase linear_sft = librosa.stft(np.asfortranarray(wav), n_fft=n_fft, win_length=win_length, hop_length=hop_length) # linear spectrogram # simplify: mag_T = np.abs(linear_sft) # original: # info("linear_sft_shape={}".format(linear_sft.shape)) # linear_spect = linear_sft.T # D = np.asfortranarray(linear_spect) # mag = np.abs(D) # mag_T = mag.T # 拆开 magphase # info("linear_sft_T_array={}".format(linear_spect.shape)) # mag, _ = librosa.magphase() # mag **= 1 # info("linear_sft_mag={}".format(mag.shape)) # phase = np.exp(1.j * np.angle(D)) # info("linear_sft_phase={}".format(phase.shape)) # info("need_a_wav_map_shape={}".format(mag.shape)) pass elif wav2feat_mode == 2: # 3. do not use mag. linear_sft = librosa.stft(np.asfortranarray(wav), n_fft=n_fft, win_length=win_length, hop_length=hop_length) # linear spectrogram info("linear_sft_shape={}".format(linear_sft.shape)) mag_T = linear_sft # using kapre. if mode == "test": mag_T = load_data(mag_T, params["train_spec_len"], params["test_spec_len"], mode) return mag_T
def tr34_model_init(self, class_num): self.tr34_cls_params["n_classes"] = class_num # def build_model(net): # return vggvox_resnet2d_icassp( # input_dim=self.params["dim"], # num_class=self.params["n_classes"], # mode="pretrain", # config=self.config, # net=net, # ) # model_34 = build_model('resnet34s') model_34 = build_tr34_model( net_name='resnet34s', input_dim=self.tr34_cls_params["dim"], num_class=self.tr34_cls_params["n_classes"], tr34_bb_config=TR34_BB_CONFIG ) model = model_34 # pretrain_path = os.path.join(os.path.dirname(__file__), self.config["resume_pretrained"]) if TR34_PRETRAIN_PATH: if os.path.isfile(TR34_PRETRAIN_PATH): model.load_weights(TR34_PRETRAIN_PATH, by_name=True, skip_mismatch=True) if self.tr34_cls_params["n_classes"] >= self.tr34_mconfig.CLASS_NUM_THS: frz_layer_num = self.tr34_mconfig.INIT_BRZ_L_NUM else: frz_layer_num = self.tr34_mconfig.INIT_BRZ_L_NUM_WILD for layer in model.layers[: frz_layer_num]: layer.trainable = False info("Note: pretrain {} is file and loaded.".format(TR34_PRETRAIN_PATH)) else: error("Error: pretrain {} is not file".format(TR34_PRETRAIN_PATH)) pretrain_output = model.output weight_decay = self.tr34_mconfig.TR34_INIT_WD # config for single-label and multi-label. # if_multilabel = True # if if_multilabel is False: y = keras.layers.Dense( self.tr34_cls_params["n_classes"], activation="softmax", kernel_initializer="orthogonal", use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name="prediction", )(pretrain_output) model = keras.models.Model(model.input, y, name="vggvox_resnet2D_{}_{}_new".format("softmax", "gvlad")) opt = keras.optimizers.Adam(lr=1e-3) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["acc"]) # backup for multi-label. ml_y = keras.layers.Dense( self.tr34_cls_params["n_classes"], activation="sigmoid", kernel_initializer="orthogonal", use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name="prediction", )(pretrain_output) self.ml_model = keras.models.Model(model.input, ml_y, name="vggvox_resnet2D_{}_{}_new".format("sigmoid", "gvlad")) ml_opt = keras.optimizers.Adam(lr=1e-3) self.ml_model.compile(optimizer=ml_opt, loss="binary_crossentropy", metrics=["acc"]) info("model compiled done.") if IF_TR34_MODELSUMMARY: model.summary() self.ml_model.summary() callbacks = list() if self.tr34_mconfig.ENABLE_CB_ES: early_stopping = EarlyStopping(monitor="val_loss", patience=15) callbacks.append(early_stopping) if self.tr34_mconfig.ENABLE_CB_LRS: normal_lr = LearningRateScheduler(self.step_decay) callbacks.append(normal_lr) return model, callbacks
def put_split_valid_np(self, val_sample_idxs:list): self.split_val_sample_idxs = val_sample_idxs self.split_val_x_np_table = self.raw_train_x_np_table[self.split_val_sample_idxs] self.split_val_y_np_table = self.raw_train_y_np_table[self.split_val_sample_idxs] self.split_val_sample_num = len(val_sample_idxs) info("put_split_val_sample_num={}".format(self.split_val_sample_num))