def train(): # prepare data tr_X, tr_y, _, te_X, te_y, te_na_list = pp_dev_data.GetAllData( fe_fd, agg_num, hop, fold) [batch_num, n_time, n_freq] = tr_X.shape print tr_X.shape, tr_y.shape print te_X.shape, te_y.shape # build model seq = Sequential() seq.add(InputLayer((n_time, n_freq))) seq.add(Flatten()) # flatten to 2d: (n_time, n_freq) to 1d:(n_time*n_freq) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_out, act='sigmoid')) md = seq.compile() md.summary() # optimizer optimizer = Adam(1e-4) # callbacks # tr_err, te_err are frame based. To get event based err, run recognize.py validation = Validation(tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=te_X, te_y=te_y, batch_size=2000, metrics=['binary_crossentropy'], call_freq=1, dump_path=None) # save model pp_dev_data.CreateFolder(cfg.dev_md_fd) save_model = SaveModel(dump_fd=cfg.dev_md_fd, call_freq=10) # callbacks callbacks = [validation, save_model] # fit model md.fit(x=tr_X, y=tr_y, batch_size=2000, n_epochs=100, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks, verbose=1)
def detect(): # init paths if type == 'home': fe_fd = cfg.eva_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home id_to_lb = cfg.id_to_lb_home if type == 'resi': fe_fd = cfg.eva_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi id_to_lb = cfg.id_to_lb_resi n_out = len(labels) # load model md = serializations.load(md_path) # do recognize for each test audio names = os.listdir(fe_fd) names = sorted(names) pp_dev_data.CreateFolder(cfg.eva_results_fd) pp_dev_data.CreateFolder(cfg.eva_results_fd + '/' + type) # detect and write out for all audios for na in names: X = cPickle.load(open(fe_fd + '/' + na, 'rb')) X = mat_2d_to_3d(X, agg_num, hop) y_pred = md.predict(X) outlist = pp_dev_data.OutMatToList(y_pred, thres, id_to_lb) full_na = type + '/audio/' + na[0:4] + '.wav' out_txt_path = cfg.eva_results_fd + '/' + type + '/' + na[ 0:4] + '_detect.ann' f = open(out_txt_path, 'w') for li in outlist: f.write(full_na + '\t' + str(li['event_onset']) + '\t' + str(li['event_offset']) + '\t' + li['event_label'] + '\n') print 'Write out detection result to', out_txt_path, 'successfully!' f.close()
def detect_cv(): # init paths if type=='home': fe_fd = cfg.dev_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home id_to_lb = cfg.id_to_lb_home tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_evaluate.txt' meta_fd = cfg.dev_meta_home_fd if type=='resi': fe_fd = cfg.dev_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi id_to_lb = cfg.id_to_lb_resi tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_evaluate.txt' meta_fd = cfg.dev_meta_resi_fd n_out = len( labels ) # load model md = serializations.load( md_path ) # get wav names to be detected te_names = pp_dev_data.GetWavNamesFromTxt( te_txt ) # do recognize for each test audio names = os.listdir( fe_fd ) names = sorted( names ) y_pred_list = [] # detect and write out to txt pp_dev_data.CreateFolder( cfg.dev_results_fd ) file_list = [] for na in names: if na[0:4] in te_names: print na gt_file = meta_fd + '/' + na[0:4] + '.ann' out_file = cfg.dev_results_fd + '/'+na[0:4]+'_detect.ann' X = cPickle.load( open( fe_fd+'/'+na, 'rb' ) ) X = mat_2d_to_3d( X, agg_num, hop ) y_pred = md.predict( X ) y_pred_list.append( y_pred ) out_list = pp_dev_data.OutMatToList( y_pred, thres, id_to_lb ) pp_dev_data.PrintListToTxt( out_list, out_file ) file_list.append( { 'reference_file': gt_file, 'estimated_file': out_file } ) # print results for this fold pp_dev_data.PrintScore( file_list, labels )
# aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) Xlist.append( X3d ) return np.concatenate( Xlist, axis=0 ) # size: n_songs*n_chunks*agg_num*n_in def GetEvaSegData( fe_fd, agg_num, hop ): te_Xlist = [] names = os.listdir( fe_fd ) te_na_list = [] # read one line for na in names: fe_path = fe_fd + '/' + na X = cPickle.load( open( fe_path, 'rb' ) ) # aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) te_Xlist.append( X3d ) te_na_list.append( na[0:-2] ) return np.array( te_Xlist ), te_na_list if __name__ == "__main__": pp_dev_data.CreateFolder( cfg.eva_fe_fd ) pp_dev_data.CreateFolder( cfg.eva_fe_mel_fd ) pp_dev_data.GetMel( cfg.eva_wav_fd, cfg.eva_fe_mel_fd, n_delete=0 )
hop = 15 fold = 1 n_labels = len(cfg.labels) # load model md = serializations.load(cfg.eva_md_fd + '/md10.p') # prepare data te_X = pp_eva_data.GetAllData(cfg.eva_fe_mel_fd, cfg.eva_csv_path, agg_num, hop) # do recognize and evaluation thres = 0.4 # thres, tune to prec=recall n_labels = len(cfg.labels) pp_dev_data.CreateFolder(cfg.eva_results_fd) txt_out_path = cfg.eva_results_fd + '/task4_results.txt' fwrite = open(txt_out_path, 'w') with open(cfg.eva_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) # read one line for li in lis: na = li[1] full_na = na + '.16kHz.wav' # get features, tags fe_path = cfg.eva_fe_mel_fd + '/' + na + '.f' X = cPickle.load(open(fe_path, 'rb'))
md.summary() # validation # tr_err, te_err are frame based. To get event based err, run recognize.py validation = Validation(tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=None, te_y=None, metrics=['binary_crossentropy'], call_freq=1, dump_path=None) # save model pp_dev_data.CreateFolder(cfg.eva_md_fd) save_model = SaveModel(dump_fd=cfg.eva_md_fd, call_freq=10) # callbacks callbacks = [validation, save_model] # optimizer optimizer = Rmsprop(1e-4) # fit model md.fit(x=tr_X, y=tr_y, batch_size=100, n_epochs=1000, loss_func='binary_crossentropy', optimizer=optimizer,
def create_folders(): pp_dev_data.CreateFolder(cfg.scrap_fd + '/Md_eva_bob') pp_dev_data.CreateFolder(cfg.scrap_fd + '/Results_eva') pp_dev_data.CreateFolder(cfg.scrap_fd + '/Results_eva/bob_eer')
def train_cv_model(): # init path if type == 'home': fe_fd = cfg.dev_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str( fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/home_fold' + str( fold) + '_evaluate.txt' if type == 'resi': fe_fd = cfg.dev_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str( fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str( fold) + '_evaluate.txt' n_out = len(labels) # load data to list tr_X, tr_y = pp_dev_data.LoadAllData(fe_fd, tr_txt, lb_to_id, agg_num, hop) tr_y = sparse_to_categorical(tr_y, n_out) print tr_X.shape print tr_y.shape n_freq = tr_X.shape[2] # build model seq = Sequential() seq.add(InputLayer((agg_num, n_freq))) seq.add(Flatten()) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_out, 'sigmoid')) md = seq.combine() # print summary info of model md.summary() # optimization method optimizer = Adam(1e-3) # callbacks (optional) # save model every n epoch pp_dev_data.CreateFolder(cfg.dev_md_fd) save_model = SaveModel(dump_fd=cfg.dev_md_fd, call_freq=5) # validate model every n epoch validation = Validation(tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=None, te_y=None, metrics=['binary_crossentropy'], call_freq=1, dump_path=None) # callbacks function callbacks = [validation, save_model] # train model md.fit(x=tr_X, y=tr_y, batch_size=20, n_epochs=100, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks)
def create_folders(): pp_dev_data.CreateFolder(cfg.scrap_fd + '/Md_dev_bob') pp_dev_data.CreateFolder(cfg.scrap_fd + '/Results_dev') pp_dev_data.CreateFolder(cfg.scrap_fd + '/Results_dev/bob_eer') pp_dev_data.CreateFolder(cfg.scrap_fd + '/Results_dev/bob_eer/fold' + str(fold))