def predict(model_path, res_path): X, y = load_data(test=False) x_train, x_val, y_train, y_val = train_test_split(X, y, random_state=69, test_size=0.1) model = load_model(model_path) y_pre = model.predict(x_val) loss = np.sqrt(np.mean(np.square(y_pre - y_val))) * 48 print('best val loss: %.8f' % loss) # model = load_model(model_path) x_test = load_data(test=True) res = model.predict(x_test) * 48 + 48 export(res, res_path)
def recognize(args): workspace = cfg.workspace events = cfg.events n_events = args.n_events snr = args.snr md_na = args.model_name lb_to_ix = cfg.lb_to_ix n_out = len(cfg.events) te_fold = cfg.te_fold md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, md_na) md = serializations.load(md_path) # Load data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) x = te_x at_gts = te_at_y sed_gts = te_sed_y na_list = te_na_list # Recognize. [at_pds] = md.predict(x) # (N, 16) observe_nodes = [md.find_layer('detect').output_] f_forward = md.get_observe_forward_func(observe_nodes) [seg_masks] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.) # (n_clips, n_time, n_out) seg_masks = np.transpose(seg_masks, (0, 2, 1))[:, :, :, np.newaxis] # Dump to pickle. out_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, os.path.splitext(md_na)[0]) pp_data.create_folder(out_dir) out_at_path = os.path.join(out_dir, "at_probs.p") out_seg_masks_path = os.path.join(out_dir, "seg_masks.p") cPickle.dump(at_pds, open(out_at_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(seg_masks, open(out_seg_masks_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) # Print stats. sed_pds = np.mean(seg_masks, axis=-1) # (N, n_out, n_time) sed_pds = np.transpose(sed_pds, (0, 2, 1)) # (N, n_time, n_out) print_stats(at_pds, at_gts, sed_pds, sed_gts)
def train(epochs=100, batch_size=48): X, y = load_data(False) x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=69, test_size=0.1) model = myModel(96, 96, 1, 30) # model = resnet(96, 96, 1, 30) # rmsprop = optimizers.RMSprop(decay=0.00001) model.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['acc']) # 保存最优模型 checkpoint = keras.callbacks.ModelCheckpoint('model/best_weights.h5', monitor='val_loss', save_best_only=True) # 15个epoch没变化时停止训练 earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15) # 保存loss曲线和精度曲线 tb = keras.callbacks.TensorBoard(log_dir='model/log/') # 更新lr的回调函数 updatelr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0002) callbacks_list = [checkpoint, earlystop, tb, updatelr] model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=callbacks_list)
def predict(): model = keras.models.load_model('model.ml') x_train,y_train = prepare_data.load_data('test') if keras.backend.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0],1,SIZE,SIZE) # x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1,SIZE,SIZE) else: x_train = x_train.reshape(x_train.shape[0],SIZE,SIZE,1) # x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (SIZE,SIZE,1) x_train = x_train / 255 pred = [] # for inp in x_train: pred.append(model.predict([x_train])) i=0 for z in pred[0]: img = x_train[i] * 255 cv2.circle(img,(int(z[0]*SIZE),int(z[1]*SIZE)),2,100,-1) img = cv2.resize(img,fx=10,fy=10, dsize=None) cv2.imwrite('data/test2/'+str(i)+'.jpg', img) print('prediction: ' + str(int(z[0]*SIZE)) + ',' + str(int(z[1]*SIZE))) print('actual: ' + str(y_train[i]*SIZE)) i+=1
def setUp(self): self.train_data, self.validation_data = \ prepare_data.load_data( self.IMAGE_DIR, self.SEG_DIR, n_class=2, train_val_rate=0.9 ) self.data = prepare_data.generate_data(self.IMAGE_DIR, self.SEG_DIR, 1)
def test_generate_test_dataset(self): test_data, _ = prepare_data.load_data(self.IMAGE_DIR, None, n_class=2, train_val_rate=0.9) test_images = prepare_data.generate_data(*test_data, 1) for img, seg in test_images: self.assertEqual(len(seg), 0) break
def get_avg_stats(args, file_name, bgn_iter, fin_iter, interval_iter): eval_hdf5_path = os.path.join(args.cpickle_dir, "eval.h5") workspace = args.workspace # Load ground truth (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) y = te_y # Average prediction probabilities of several iterations prob_dir = os.path.join(workspace, "probs", file_name, "test") names = os.listdir(prob_dir) probs = [] iters = range(bgn_iter, fin_iter, interval_iter) for iter in iters: pickle_path = os.path.join(prob_dir, "prob_%d_iters.p" % iter) prob = cPickle.load(open(pickle_path, 'rb')) probs.append(prob) #print(len(probs)) avg_prob = np.mean(np.array(probs), axis=0) # Compute stats t1 = time.time() n_out = y.shape[1] stats = [] for k in range(n_out): (precisions, recalls, thresholds) = metrics.precision_recall_curve(y[:, k], avg_prob[:, k]) avg_precision = metrics.average_precision_score(y[:, k], avg_prob[:, k], average=None) (fpr, tpr, thresholds) = metrics.roc_curve(y[:, k], avg_prob[:, k]) auc = metrics.roc_auc_score(y[:, k], avg_prob[:, k], average=None) #eer = pp_data.eer(avg_prob[:, k], y[:, k]) skip = 1000 dict = {'precisions': precisions[0::skip], 'recalls': recalls[0::skip], 'AP': avg_precision, 'fpr': fpr[0::skip], 'fnr': 1. - tpr[0::skip], 'auc': auc} stats.append(dict) logging.info("Callback time: %s" % (time.time() - t1,)) # Dump stats dump_path = os.path.join(workspace, "stats", pp_data.get_filename(__file__), "test", "avg_%d_%d_%d.p" % (bgn_iter, fin_iter, interval_iter)) pp_data.create_folder(os.path.dirname(dump_path)) cPickle.dump(stats, open(dump_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) #print(stats.shape) #for i, e in enumerate(stats): # logging.info("%d. mAP: %f, auc: %f, d_prime: %f" % (i, e['AP'], e['auc'], pp_data.d_prime(e['auc']))) # Write out to log logging.info("bgn_iter, fin_iter, interval_iter: %d, %d, %d" % (bgn_iter, fin_iter, interval_iter)) logging.info("mAP: %f" % np.mean([e['AP'] for e in stats])) auc = np.mean([e['auc'] for e in stats]) logging.info("auc: %f" % auc) logging.info("d_prime: %f" % pp_data.d_prime(auc))
def main(): use_cuda = False batch_first = True n_layer = 4 batch_size = 128 hidden_size = [32, 32, 32, 32] input_size = 3 height = 36 width = 80 lags = 12 steps = 12 channels = 1 bias = True file_path = 'sst.mon.mean1850-2015.nc' ##### input, target = prepare_data.load_data(file_path, lags, steps) data_generator = prepare_data.get_batches(input, target, batch_size, height, width, channels, lags, steps) # encoder1 = Encoder(input_lang.n_words, hidden_size) encoder1 = Encoder(n_layers=n_layer, hidden_sizes=hidden_size, input_sizes=input_size, batch_size=batch_size, channels=channels, height=height, width=width, bias=bias, use_cuda=use_cuda) # decoder1 = Decoder(hidden_size, output_lang.n_words, dropout_p=0.1) decoder1 = Decoder(n_layers=n_layer, hidden_sizes=hidden_size, input_sizes=input_size, batch_size=batch_size, height=height, width=width, use_cuda=use_cuda) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = decoder1.cuda() trainIters(encoder1, decoder1, 75000, data_generator, print_every=5000, batch_first=batch_first, use_cuda=use_cuda)
def plot_fig2(): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold # Load data. snr = 20 n_events = 3 feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) x = te_x at_y = te_at_y na_list = te_na_list # Load model. md_path = os.path.join( workspace, "models/tmp01/n_events=3/fold=0/snr=20/md2000_iters.p") md = serializations.load(md_path) # observe_nodes = [md.find_layer('seg_masks').output_] f_forward = md.get_observe_forward_func(observe_nodes) [seg_masks] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.) print(seg_masks.shape) # for (i1, na) in enumerate(na_list): # if '00292' in na: # idx = i1 # print(idx) for i1 in xrange(len(seg_masks)): print(na_list[i1]) print(at_y[i1]) fig, axs = plt.subplots(5, 4, sharex=True) axs[0, 0].matshow(x[i1].T, origin='lower', aspect='auto', cmap='jet') for i2 in xrange(16): axs[i2 / 4 + 1, i2 % 4].matshow(seg_masks[i1, i2].T, origin='lower', aspect='auto', vmin=0, vmax=1, cmap='jet') axs[i2 / 4 + 1, i2 % 4].set_title(events[i2]) plt.show()
def newCNNNetwork(model_name): print(model_name) name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") print(name) tensBoard = keras.callbacks.TensorBoard(log_dir='log') img_rows, img_cols = SIZE, SIZE number_of_categories = 2 x_train, y_train = prepare_data.load_data('train') if keras.backend.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) #x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) #x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train / 255 # y_train = y_train / 128 model = keras.models.Sequential() model.add(keras.layers.Conv2D(64, (2, 2), input_shape=input_shape)) model.add(keras.layers.Activation('relu')) model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(keras.layers.Dropout(0.2)) # model.add(keras.layers.Conv2D(128, (3, 3))) # model.add(keras.layers.Activation('relu')) # model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) # model.add(keras.layers.Dropout(0.2)) model.add(keras.layers.Flatten()) # # model.add(keras.layers.Dense(512, activation='relu')) # model.add(keras.layers.Dropout(0.4)) model.add(keras.layers.Dense(256, activation='relu')) model.add(keras.layers.Dropout(0.1)) model.add(keras.layers.Dense(number_of_categories, activation='linear')) opt = keras.optimizers.Adam(decay=0.000000, lr=0.0015) model.compile(optimizer=opt, loss='mse', metrics=['mse']) #model.summary() model.fit(x_train, y_train, epochs=40, batch_size=32, shuffle=True, validation_split=0.0, callbacks=[tensBoard]) model.save('model.ml') #sess.close() keras.backend.clear_session()
def main(): print("\nBegin k-means clustering") X_test, Y_test = load_data('toy1', 'test') k = 3 print("\nClustering data with k=" + str(k)) clustering = cluster(X_test, k) print("\nDone. Clustering:") print(clustering) fig = plt.figure() plt.scatter(X_test[:,0], X_test[:,1], marker='+') plt.show()
def get_stats(args, bgn_iter, fin_iter, interval): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_events = args.n_events snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_gts = te_at_y sed_gts = te_sed_y # Load and sum preds_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) at_probs_list, seg_masks_list = [], [] for iter in xrange(bgn_iter, fin_iter, interval): at_probs_path = os.path.join(preds_dir, "md%d_iters" % iter, "at_probs.p") at_probs = cPickle.load(open(at_probs_path, 'rb')) at_probs_list.append(at_probs) seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter, "seg_masks.p") seg_masks = cPickle.load(open(seg_masks_path, 'rb')) seg_masks_list.append(seg_masks) at_probs = np.mean(at_probs_list, axis=0) # (n_clips, n_classes) seg_masks = np.mean(seg_masks_list, axis=0) # (n_clips, n_classes, n_time, n_freq) sed_probs = np.mean(seg_masks, axis=-1).transpose(0, 2, 1) # (n_clips, n_time, n_classes) print_stats(at_probs, at_gts, sed_probs, sed_gts)
def plot_hotmap(args): workspace = cfg.workspace events = cfg.events md_na = args.model_name n_events = args.n_events te_fold = cfg.te_fold feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, is_scale=is_scale) md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, md_na) md = serializations.load(md_path) x = te_x y = te_at_y observe_nodes = [md.find_layer('hotmap').output_] f_forward = md.get_observe_forward_func(observe_nodes) [a4] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.) print a4.shape for i1 in xrange(len(a4)): # if te_na_list[i1] == 'CR_lounge_220110_0731.s2700_chunk48': print(y[i1]) # print np.mean(a4[i1], axis=(1,2)) fig, axs = plt.subplots(5, 4, sharex=True) axs[0, 0].matshow(x[i1].T, origin='lower', aspect='auto') for i2 in xrange(16): axs[i2 / 4 + 1, i2 % 4].matshow(a4[i1, i2].T, origin='lower', aspect='auto', vmin=0, vmax=1) axs[i2 / 4 + 1, i2 % 4].set_title(events[i2]) plt.show()
def validation(self, sess, output): val_image = prepare_data.load_data(self.VALIDATION_DIR, None, n_class=2, train_val_rate=1)[0] data = prepare_data.generate_data(*val_image, batch_size=1) for Input, _ in data: result = sess.run(output, feed_dict={ self.X: Input, self.is_training: None }) break result = np.argmax(result[0], axis=2) ident = np.identity(3, dtype=np.int8) result = ident[result] * 255 plt.imshow((Input[0] * 255).astype(np.int16)) plt.imshow(result, alpha=0.2) plt.show()
def plot_fig4(data_type, audio_idx): workspace = cfg.workspace n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate events = cfg.events te_fold = cfg.te_fold # Read audio. audio_path = os.path.join( workspace, "mixed_audio/n_events=3/%s.mixed_20db.wav" % audio_idx) (audio, _) = pp_data.read_audio(audio_path, fs) # Calculate log Mel. x = _calc_feat(audio) sp = _calc_spectrogram(audio) print(x.shape) # Plot. fig, axs = plt.subplots(4, 4, sharex=False) # Plot log Mel spectrogram. for i2 in xrange(16): axs[i2 / 4, i2 % 4].set_visible(False) axs[0, 0].matshow(x.T, origin='lower', aspect='auto', cmap='jet') axs[0, 0].xaxis.set_ticks([0, 60, 120, 180, 239]) axs[0, 0].xaxis.tick_bottom() axs[0, 0].xaxis.set_ticklabels(np.arange(0, 10.1, 2.5)) axs[0, 0].set_xlabel("time (s)") # axs[0,0].xaxis.set_label_coords(1.12, -0.05) axs[0, 0].yaxis.set_ticks([0, 16, 32, 48, 63]) axs[0, 0].yaxis.set_ticklabels([0, 16, 32, 48, 63]) axs[0, 0].set_ylabel('Mel freq. bin') axs[0, 0].set_title("Log Mel spectrogram") axs[0, 0].set_visible(True) # Plot spectrogram. axs[0, 2].matshow(np.log(sp.T + 1.), origin='lower', aspect='auto', cmap='jet') axs[0, 2].xaxis.set_ticks([0, 60, 120, 180, 239]) axs[0, 2].xaxis.tick_bottom() axs[0, 2].xaxis.set_ticklabels(np.arange(0, 10.1, 2.5)) axs[0, 2].set_xlabel("time (s)") # axs[0,2].xaxis.set_label_coords(1.12, -0.05) axs[0, 2].yaxis.set_ticks([0, 128, 256, 384, 512]) axs[0, 2].yaxis.set_ticklabels([0, 128, 256, 384, 512]) axs[0, 2].set_ylabel('FFT freq. bin') axs[0, 2].set_title("Spectrogram") axs[0, 2].set_visible(True) # plt.tight_layout() plt.show() # Load data. snr = 20 n_events = 3 feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) if data_type == "train": x = tr_x at_y = tr_at_y sed_y = tr_sed_y na_list = tr_na_list elif data_type == "test": x = te_x at_y = te_at_y sed_y = te_sed_y na_list = te_na_list for (i1, na) in enumerate(na_list): if audio_idx in na: idx = i1 print(idx) # GT mask (stereo_audio, _) = pp_data.read_stereo_audio(audio_path, target_fs=fs) event_audio = stereo_audio[:, 0] noise_audio = stereo_audio[:, 1] mixed_audio = event_audio + noise_audio ham_win = np.hamming(n_window) mixed_cmplx_sp = pp_data.calc_sp(mixed_audio, fs, ham_win, n_window, n_overlap) mixed_sp = np.abs(mixed_cmplx_sp) event_sp = np.abs( pp_data.calc_sp(event_audio, fs, ham_win, n_window, n_overlap)) noise_sp = np.abs( pp_data.calc_sp(noise_audio, fs, ham_win, n_window, n_overlap)) db = -5. gt_mask = (np.sign(20 * np.log10(event_sp / noise_sp) - db) + 1.) / 2. # (n_time, n_freq) fig, axs = plt.subplots(4, 4, sharex=True) for i2 in xrange(16): ind_gt_mask = gt_mask * sed_y[idx, :, i2][:, None] axs[i2 / 4, i2 % 4].matshow(ind_gt_mask.T, origin='lower', aspect='auto', cmap='jet') # axs[i2/4, i2%4].set_title(events[i2]) axs[i2 / 4, i2 % 4].xaxis.set_ticks([]) axs[i2 / 4, i2 % 4].yaxis.set_ticks([]) axs[i2 / 4, i2 % 4].set_xlabel('time') axs[i2 / 4, i2 % 4].set_ylabel('FFT freq. bin') plt.show() for filename in ["tmp01", "tmp02", "tmp03"]: # Plot up sampled seg masks. preds_dir = os.path.join(workspace, "preds", filename, "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) at_probs_list, seg_masks_list = [], [] bgn_iter, fin_iter, interval = 2000, 3001, 200 for iter in xrange(bgn_iter, fin_iter, interval): seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter, "seg_masks.p") seg_masks = cPickle.load(open(seg_masks_path, 'rb')) seg_masks_list.append(seg_masks) seg_masks = np.mean(seg_masks_list, axis=0) # (n_clips, n_classes, n_time, n_freq) print(at_y[idx]) melW = librosa.filters.mel(sr=fs, n_fft=cfg.n_window, n_mels=64, fmin=0., fmax=fs / 2) inverse_melW = get_inverse_W(melW) spec_masks = np.dot(seg_masks[idx], inverse_melW) # (n_classes, n_time, 513) fig, axs = plt.subplots(4, 4, sharex=True) for i2 in xrange(16): axs[i2 / 4, i2 % 4].matshow(spec_masks[i2].T, origin='lower', aspect='auto', vmin=0, vmax=1, cmap='jet') # axs[i2/4, i2%4].set_title(events[i2]) axs[i2 / 4, i2 % 4].xaxis.set_ticks([]) axs[i2 / 4, i2 % 4].yaxis.set_ticks([]) axs[i2 / 4, i2 % 4].set_xlabel('time') axs[i2 / 4, i2 % 4].set_ylabel('FFT freq. bin') fig.suptitle(filename) plt.show() # Plot SED probs. sed_probs = np.mean(seg_masks[idx], axis=-1) # (n_classes, n_time) fig, axs = plt.subplots(4, 4, sharex=False) for i2 in xrange(16): axs[i2 / 4, i2 % 4].set_visible(False) axs[0, 0].matshow(sed_probs, origin='lower', aspect='auto', vmin=0, vmax=1, cmap='jet') # axs[0, 0].xaxis.set_ticks([0, 60, 120, 180, 239]) # axs[0, 0].xaxis.tick_bottom() # axs[0, 0].xaxis.set_ticklabels(np.arange(0, 10.1, 2.5)) axs[0, 0].xaxis.set_ticks([]) # axs[0, 0].set_xlabel('time (s)') axs[0, 0].yaxis.set_ticks(xrange(len(events))) axs[0, 0].yaxis.set_ticklabels(events) for tick in axs[0, 0].yaxis.get_major_ticks(): tick.label.set_fontsize(8) axs[0, 0].set_visible(True) axs[1, 0].matshow(sed_y[idx].T, origin='lower', aspect='auto', vmin=0, vmax=1, cmap='jet') # axs[1, 0].xaxis.set_ticks([]) axs[1, 0].xaxis.set_ticks([0, 60, 120, 180, 239]) axs[1, 0].xaxis.tick_bottom() axs[1, 0].xaxis.set_ticklabels(np.arange(0, 10.1, 2.5)) axs[1, 0].set_xlabel('time (s)') axs[1, 0].yaxis.set_ticks(xrange(len(events))) axs[1, 0].yaxis.set_ticklabels(events) for tick in axs[1, 0].yaxis.get_major_ticks(): tick.label.set_fontsize(8) axs[1, 0].set_visible(True) fig.suptitle(filename) plt.show()
def train(args): cpickle_dir = args.cpickle_dir workspace = args.workspace # Path of hdf5 data bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5") eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path) tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) logging.info("Loading data time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build model n_hid = 500 n_out = tr_y.shape[1] lay_in = InputLayer(in_shape=(n_time, n_freq)) a = Dense(n_out=n_hid, act='relu')(lay_in) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) cla = Dense(n_out=n_out, act='sigmoid', name='cla')(a) att = Dense(n_out=n_out, act='softmax', name='att')(a) # Attention lay_out = Lambda(_attention)([cla, att]) # Compile model md = Model(in_layers=[lay_in], out_layers=[lay_out]) md.compile() md.summary(is_logging=True) # Save model every several iterations call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True) # Callbacks function callbacks = [save_model] batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Optimization method optimizer = Adam(lr=args.lr) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Compute stats every several interations if md.iter_ % call_freq == 0: # Stats of evaluation dataset t1 = time.time() te_err = eval(md=md, x=te_x, y=te_y, out_dir=os.path.join(stat_dir, "test"), out_probs_dir=os.path.join(prob_dir, "test")) logging.info("Evaluate test time: %s" % (time.time() - t1, )) # Stats of training dataset t1 = time.time() tr_bal_err = eval(md=md, x=tr_x1, y=tr_y1, out_dir=os.path.join(stat_dir, "train_bal"), out_probs_dir=None) logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, )) # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) md.train_on_batch(batch_x=tr_batch_x, batch_y=tr_batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) # Stop training when maximum iteration achieves if md.iter_ == call_freq * 31: break
from datetime import datetime import os import tensorflow as tf from star_rnn import STARCell from prepare_data import load_data BATCH_SIZE = 200 EPOCHS = 30 RNN_UNITS = 32 NUM_LAYERS = 2 DROPOUT = 0.1 x_train, y_train, x_valid, y_valid, x_test, y_test = load_data('add', seq_len=200) with tf.device('cpu'): x_train = tf.constant(x_train, dtype=tf.float32) y_train = tf.constant(y_train, dtype=tf.float32) print("X train: ", x_train.shape, "Y train: ", y_train.shape) x_valid = tf.constant(x_valid, dtype=tf.float32) y_valid = tf.constant(y_valid, dtype=tf.float32) print("X val: ", x_valid.shape, "Y val: ", y_valid.shape) x_test = tf.constant(x_test, dtype=tf.float32) y_test = tf.constant(y_test, dtype=tf.float32) print("X test: ", x_test.shape, "Y test: ", y_test.shape) print()
from models import LeNet, AlexNet, VGG13, ResNet34, TestNet from keras.models import load_model from keras import optimizers from prepare_data import load_data from utils import CLASS_NUM, IMG_SIZE import os train_X, test_X, train_y, test_y = load_data(class_num=CLASS_NUM, img_size=IMG_SIZE) if not os.path.exists('autogta.h5'): autogta = AlexNet(train_X[0].shape) autogta.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) autogta.fit(x=train_X, y=train_y, epochs=10, batch_size=16) autogta.save('autogta.h5') else: autogta = load_model('autogta.h5') loss, accu = autogta.evaluate(x=test_X, y=test_y) print('loss\t{}\naccuracy\t{}'.format(loss, accu))
sequence, clusters = prepare_data.new_build_location_voc( newsequence, locations) top_500 = prepare_data.pop_n(sequence, config.num_sample) total_recall_5 = 0.0 total_f1_5 = 0.0 total_recall_10 = 0.0 total_f1_10 = 0.0 total_user = len(sequence_user) config.user_size = total_user eval_config.user_size = total_user train_set = (sequence, sequence_user, sequence_time, sequence_distance) final_train_set, final_test_set, final_negative_samples, vocabulary_distances = prepare_data.load_data( train_set, locations, config.num_sample, clusters, top_500, 0.1, True) new_train_set_sequence, new_train_set_time, new_train_set_distance, mask_train_x, train_set_user = final_train_set negative_samples, negative_time_sample, negative_distance_samples = final_negative_samples config.vocab_size = locations.shape[1] config.num_steps = new_train_set_sequence[0].shape[1] if new_train_set_sequence[0].shape[0] <= 10: config.batch_size = 1 else: if new_train_set_sequence[0].shape[0] <= 50: config.batch_size = 2 else: if new_train_set_sequence[0].shape[0] <= 100: config.batch_size = 5 else:
from hat.layers.pool import MaxPool2D from hat.callbacks import SaveModel, Validation from hat.preprocessing import sparse_to_categorical from hat.optimizers import Adam import hat.backend as K from prepare_data import load_data # resize data for fit into CNN. size: (batch_num*color_maps*height*weight) def reshapeX(X): N = len(X) return X.reshape((N, 1, 28, 28)) ### load & prepare data tr_X, tr_y, va_X, va_y, te_X, te_y = load_data() tr_X, va_X, te_X = reshapeX(tr_X), reshapeX(va_X), reshapeX(te_X) # init params n_in = 784 n_hid = 500 n_out = 10 # sparse label to 1 of K categorical label tr_y = sparse_to_categorical(tr_y, n_out) va_y = sparse_to_categorical(va_y, n_out) te_y = sparse_to_categorical(te_y, n_out) ### Build model act = 'relu' seq = Sequential()
def separate(args, bgn_iter, fin_iter, interval): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_events = args.n_events n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate clip_duration = cfg.clip_duration snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_y = te_at_y sed_y = te_sed_y na_list = te_na_list # Load and sum preds_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) at_probs_list, seg_masks_list = [], [] for iter in xrange(bgn_iter, fin_iter, interval): seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter, "seg_masks.p") seg_masks = cPickle.load(open(seg_masks_path, 'rb')) seg_masks_list.append(seg_masks) seg_masks = np.mean(seg_masks_list, axis=0) # (n_clips, n_classes, n_time, n_freq) print(seg_masks.shape) # audio_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) sep_dir = os.path.join(workspace, "sep_audio", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) pp_data.create_folder(sep_dir) ham_win = np.hamming(n_window) recover_scaler = np.sqrt((ham_win**2).sum()) melW = librosa.filters.mel(sr=fs, n_fft=n_window, n_mels=64, fmin=0., fmax=fs / 2) inverse_melW = get_inverse_W(melW) # (64, 513) seg_stats = {} for e in events: seg_stats[e] = { 'fvalue': [], 'auc': [], 'iou': [], 'hit': [], 'fa': [], 'tp': [], 'fn': [], 'fp': [] } cnt = 0 for (i1, na) in enumerate(na_list): bare_na = os.path.splitext(na)[0] audio_path = os.path.join(audio_dir, "%s.wav" % bare_na) (stereo_audio, _) = pp_data.read_stereo_audio(audio_path, target_fs=fs) event_audio = stereo_audio[:, 0] noise_audio = stereo_audio[:, 1] mixed_audio = event_audio + noise_audio mixed_cmplx_sp = pp_data.calc_sp(mixed_audio, fs, ham_win, n_window, n_overlap) mixed_sp = np.abs(mixed_cmplx_sp) event_sp = np.abs( pp_data.calc_sp(event_audio, fs, ham_win, n_window, n_overlap)) noise_sp = np.abs( pp_data.calc_sp(noise_audio, fs, ham_win, n_window, n_overlap)) sm = seg_masks[i1] # (n_classes, n_time, n_freq) sm_upsampled = np.dot(sm, inverse_melW) # (n_classes, n_time, 513) print(na) # Write out separated events. for j1 in xrange(len(events)): if at_y[i1][j1] == 1: (fvalue, auc, iou, tp, fn, fp) = fvalue_iou(sm_upsampled[j1], event_sp, noise_sp, sed_y[i1, :, j1], seg_thres, inside_only=True) (hit, fa) = hit_fa(sm_upsampled[j1], event_sp, noise_sp, sed_y[i1, :, j1], seg_thres, inside_only=True) seg_stats[events[j1]]['fvalue'].append(fvalue) seg_stats[events[j1]]['auc'].append(auc) seg_stats[events[j1]]['iou'].append(iou) seg_stats[events[j1]]['hit'].append(hit) seg_stats[events[j1]]['fa'].append(fa) seg_stats[events[j1]]['tp'].append(tp) seg_stats[events[j1]]['fn'].append(fn) seg_stats[events[j1]]['fp'].append(fp) sep_event_sp = sm_upsampled[j1] * mixed_sp sep_event_s = spectrogram_to_wave.recover_wav( sep_event_sp, mixed_cmplx_sp, n_overlap=n_overlap, winfunc=np.hamming, wav_len=int(fs * clip_duration)) sep_event_s *= recover_scaler out_event_audio_path = os.path.join( sep_dir, "%s.%s.wav" % (bare_na, events[j1])) pp_data.write_audio(out_event_audio_path, sep_event_s, fs) # Write out separated noise. sm_noise_upsampled = np.clip(1. - np.sum(sm_upsampled, axis=0), 0., 1.) sep_noise_sp = sm_noise_upsampled * mixed_sp sep_noise_s = spectrogram_to_wave.recover_wav(sep_noise_sp, mixed_cmplx_sp, n_overlap=n_overlap, winfunc=np.hamming, wav_len=int( fs * clip_duration)) sep_noise_s *= recover_scaler out_noise_audio_path = os.path.join(sep_dir, "%s.noise.wav" % bare_na) pp_data.write_audio(out_noise_audio_path, sep_noise_s, fs) cnt += 1 # if cnt == 2: break fvalues, aucs, ious, hits, fas, tps, fns, fps = [], [], [], [], [], [], [], [] for e in events: fvalues.append(np.mean(seg_stats[e]['fvalue'])) ious.append(np.mean(seg_stats[e]['iou'])) aucs.append(np.mean(seg_stats[e]['auc'])) hits.append(np.mean(seg_stats[e]['hit'])) fas.append(np.mean(seg_stats[e]['fa'])) tps.append(np.mean(seg_stats[e]['tp'])) fns.append(np.mean(seg_stats[e]['fn'])) fps.append(np.mean(seg_stats[e]['fp'])) logging.info("%sfvalue\tauc\tiou\tHit\tFa\tHit-Fa\tTP\tFN\tFP" % ("".ljust(16))) logging.info( "%s*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f" % ("*Avg. of each".ljust(16), np.mean(fvalues), np.mean(aucs), np.mean(ious), np.mean(hits), np.mean(fas), np.mean(hits) - np.mean(fas), np.mean(tps), np.mean(fns), np.mean(fps))) for i1 in xrange(len(events)): logging.info( "%s%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f" % (events[i1].ljust(16), fvalues[i1], aucs[i1], ious[i1], hits[i1], fas[i1], hits[i1] - fas[i1], tps[i1], fns[i1], fps[i1]))
# -*- coding: utf-8 -*- """ Created on Sun Jul 19 12:07:37 2020 @author: lwang """ import pickle from prepare_data import load_data #%% previously saved dict data X_train, X_test, Y_train, Y_test = load_data() #%% save dataset into a dic for later use Acc_6class_UCI={ 'X_train': X_train, 'X_test': X_test, 'Y_train':Y_train, 'Y_test':Y_test, } a_file = open("Acc_6class_UCI.pkl", "wb") pickle.dump(Acc_6class_UCI, a_file) a_file.close() #%% read saved UCI Acceleromter (Acc) data # a_file = open("Acc_6class_UCI.pkl", "rb") # data = pickle.load(a_file) # a_file.close()
from hat.callbacks import SaveModel, Validation from hat.preprocessing import sparse_to_categorical from hat.optimizers import SGD, Rmsprop, Adam import hat.backend as K import config as cfg import prepare_data as pp_data # reshape image from N*1024 to N*3*32*32 def reshape_img_for_cnn(x): N = x.shape[0] return np.reshape(x, (N, 3, 32, 32)) # load data tr_X, tr_y, te_X, te_y = pp_data.load_data() # normalize data scaler = pp_data.get_scaler(tr_X) tr_X = pp_data.transform(tr_X, scaler) te_X = pp_data.transform(te_X, scaler) # reshape X to shape: (n_pictures, n_fmaps=3, n_row=32, n_col=32) tr_X = reshape_img_for_cnn(tr_X) te_X = reshape_img_for_cnn(te_X) # init params n_out = 10 # sparse label to 1-of-K categorical label tr_y = sparse_to_categorical(tr_y, n_out)
def evaluate_lenet5(learning_rate=0.01, n_epochs=20, nkerns=[20, 50], batch_size=100): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 32, 32), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 25000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def train(self, parser): """ training operation argument of this function are given by functions in prepare_data.py Parameters ---------- parser: the paser that has some options """ epoch = parser.epoch l2 = parser.l2 batch_size = parser.batch_size train_val_rate = parser.train_rate output = self.UNet(l2_reg=l2, is_training=self.is_training) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=output)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_ops = tf.train.AdamOptimizer( parser.learning_rate).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=100) all_train, all_val = prepare_data.load_data( self.IMAGE_DIR, self.SEGMENTED_DIR, n_class=2, train_val_rate=train_val_rate) with tf.Session() as sess: init.run() for e in range(epoch): data = prepare_data.generate_data(*all_train, batch_size) val_data = prepare_data.generate_data(*all_val, len(all_val[0])) for Input, Teacher in data: sess.run(train_ops, feed_dict={ self.X: Input, self.y: Teacher, self.is_training: True }) ls = loss.eval(feed_dict={ self.X: Input, self.y: Teacher, self.is_training: None }) for val_Input, val_Teacher in val_data: val_loss = loss.eval( feed_dict={ self.X: val_Input, self.y: val_Teacher, self.is_training: None }) print(f'epoch #{e + 1}, loss = {ls}, val loss = {val_loss}') if e % 100 == 0: saver.save(sess, f"./params/model_{e + 1}epochs.ckpt") self.validation(sess, output)
if 'pubmed' in dataset: task = 'pubmed' elif 'movie' in dataset: task = 'movie' dataset = '../data/' + dataset + '.pkl.gz' modelType = args['-model'] n_layers, dim = args['-nlayers'], args['-dim'] shared = args['-shared'] saving = args['-saving'] nmean = args['-nmean'] yidx = args['-y'] if 'dr' in args['-reg']: dropout = True else: dropout = False feats, labels, rel_list, rel_mask, train_ids, valid_ids, test_ids = prepare_data.load_data(dataset) print len(train_ids), len(valid_ids), len(test_ids) labels = labels.astype('int64') if task == 'movie': labels = labels[:, yidx : yidx+1] def remove(y, not_ids): new_y = numpy.copy(y) for ids in not_ids: new_y[ids] = -1 return new_y if type == 'software': train_y = remove(labels, [test_ids]) valid_y = remove(labels, [train_ids]) else:
def train(): # load data batch_size = 128 tr_X, tr_y, va_X, va_y, te_X, te_y = pp_data.load_data() n_batches = int(tr_X.shape[0] / batch_size) # normalize data between [-1,1] tr_X = (tr_X - 0.5) * 2 tr_X = tr_X.reshape((50000, 1, 28, 28)) print tr_X.shape # generator a0 = InputLayer(100) a1 = Dense(128 * 7 * 7, act='linear')(a0) a1 = BN(axis=0)(a1) a1 = Reshape(out_shape=(128, 7, 7))(a1) a1 = Convolution2D(64, 5, 5, act='linear', border_mode=(2, 2))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('leaky_relu')(a1) a1 = UpSampling2D(size=(2, 2))(a1) a1 = Convolution2D(32, 5, 5, act='linear', border_mode=(2, 2))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('leaky_relu')(a1) a1 = UpSampling2D(size=(2, 2))(a1) a8 = Convolution2D(1, 5, 5, act='tanh', border_mode=(2, 2), name='a8')(a1) g = Model([a0], [a8]) g.compile() g.summary() # discriminator b0 = InputLayer((1, 28, 28), name='b0') b1 = Convolution2D(64, 5, 5, act='relu', border_mode=(0, 0), name='b1')(b0) b1 = MaxPooling2D(pool_size=(2, 2))(b1) b1 = Convolution2D(128, 5, 5, act='relu', border_mode=(0, 0))(b1) b1 = MaxPooling2D(pool_size=(2, 2))(b1) b1 = Flatten()(b1) b8 = Dense(1, act='sigmoid')(b1) d = Model([b0], [b8]) d.compile() d.summary() # discriminator on generator d_on_g = Model() d.set_trainability(False) d_on_g.add_models([g, d]) d.set_trainability(True) d_on_g.joint_models('a8', 'b0') d_on_g.compile() d_on_g.summary() # optimizer opt_d = Adam(1e-4) opt_g = Adam(1e-4) # optimization function f_train_d = d.get_optimization_func(target_dims=[2], loss_func='binary_crossentropy', optimizer=opt_d, clip=None) f_train_g = d_on_g.get_optimization_func(target_dims=[2], loss_func='binary_crossentropy', optimizer=opt_g, clip=None) noise = np.zeros((batch_size, 100)) for epoch in range(100): print epoch for index in range(n_batches): # concatenate generated img and real image to train discriminator. noise = np.random.uniform(-1, 1, (batch_size, 100)) batch_x = tr_X[index * batch_size:(index + 1) * batch_size] batch_gx = g.predict(noise) batch_x_all = np.concatenate((batch_x, batch_gx)) # assign real img label as 1, generated img label as 0 batch_y_all = np.array([1] * batch_size + [0] * batch_size) batch_y_all = batch_y_all.reshape((batch_y_all.shape[0], 1)) # save out generated img if index % 50 == 0: image = pp_data.combine_images(batch_gx) image = image * 127.5 + 127.5 if not os.path.exists("img_dcgan"): os.makedirs("img_dcgan") Image.fromarray(image.astype( np.uint8)).save("img_dcgan/" + str(epoch) + "_" + str(index) + ".png") # train discriminator d_loss = d.train_on_batch(f_train_d, batch_x_all, batch_y_all) # assign generate img label as 1, so as to deceive discriminator noise = np.random.uniform(-1, 1, (batch_size, 100)) batch_y_all = np.array([1] * batch_size) batch_y_all = batch_y_all.reshape((batch_y_all.shape[0], 1)) # train generator g_loss = d_on_g.train_on_batch(f_train_g, noise, batch_y_all) print index, "d_loss:", d_loss, "\tg_loss:", g_loss
def test_wrapper(test_agent, args): data_list = load_data(args.data) x_test = data_list[4] y_test = data_list[5] test_agent.test(x_test, y_test, 'models/' + args.name + '/')
def train(args): workspace = cfg.workspace te_fold = cfg.te_fold n_events = args.n_events snr = args.snr feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) print(tr_x.shape, tr_at_y.shape) print(te_x.shape, te_at_y.shape) (_, n_time, n_freq) = tr_x.shape n_out = len(cfg.events) if False: for e in tr_x: plt.matshow(e.T, origin='lower', aspect='auto') plt.show() # Build model. lay_in = InputLayer(in_shape=(n_time, n_freq)) a = Reshape((1, n_time, n_freq))(lay_in) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=n_out, n_row=1, n_col=1, act='sigmoid', border_mode=(0, 0), name='seg_masks')(a) a8 = Lambda(_global_avg_pooling, name='a8')(a) md = Model([lay_in], [a8]) md.compile() md.summary(is_logging=True) # Callbacks. md_dir = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) pp_data.create_folder(md_dir) save_model = SaveModel(md_dir, call_freq=50, type='iter', is_logging=True) validation = Validation(te_x=te_x, te_y=te_at_y, batch_size=50, call_freq=50, metrics=['binary_crossentropy'], dump_path=None, is_logging=True) callbacks = [save_model, validation] observe_nodes = [md.find_layer('seg_masks').output_] f_forward = md.get_observe_forward_func(observe_nodes) # Generator. tr_gen = DataGenerator(batch_size=32, type='train') eva_gen = DataGenerator2(batch_size=32, type='test') # Train. loss_ary = [] t1 = time.time() optimizer = Adam(1e-3) for (batch_x, batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_at_y]): if md.iter_ % 50 == 0: logging.info("iter: %d tr_loss: %f time: %s" % ( md.iter_, np.mean(loss_ary), time.time() - t1, )) t1 = time.time() loss_ary = [] # if md.iter_ % 200 == 0: # write_out_at_sed(md, eva_gen, f_forward, te_x, te_at_y, te_sed_y, n_events, snr, te_fold) if md.iter_ == 5001: break loss = md.train_on_batch(batch_x, batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) loss_ary.append(loss)
def evaluate_separation(args): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate clip_duration = cfg.clip_duration n_events = args.n_events snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_y = te_at_y sed_y = te_sed_y na_list = te_na_list audio_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) sep_dir = os.path.join(workspace, "sep_audio", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) sep_stats = {} for e in events: sep_stats[e] = {'sdr': [], 'sir': [], 'sar': []} cnt = 0 for (i1, na) in enumerate(na_list): bare_na = os.path.splitext(na)[0] gt_audio_path = os.path.join(audio_dir, "%s.wav" % bare_na) (stereo_audio, _) = pp_data.read_stereo_audio(gt_audio_path, target_fs=fs) gt_event_audio = stereo_audio[:, 0] gt_noise_audio = stereo_audio[:, 1] print(na) for j1 in xrange(len(events)): if at_y[i1][j1] == 1: sep_event_audio_path = os.path.join( sep_dir, "%s.%s.wav" % (bare_na, events[j1])) (sep_event_audio, _) = pp_data.read_audio(sep_event_audio_path, target_fs=fs) sep_noise_audio_path = os.path.join(sep_dir, "%s.noise.wav" % bare_na) (sep_noise_audio, _) = pp_data.read_audio(sep_noise_audio_path, target_fs=fs) ref_array = np.array((gt_event_audio, gt_noise_audio)) est_array = np.array((sep_event_audio, sep_noise_audio)) (sdr, sir, sar) = sdr_sir_sar(ref_array, est_array, sed_y[i1, :, j1], inside_only=True) print(sdr, sir, sar) sep_stats[events[j1]]['sdr'].append(sdr) sep_stats[events[j1]]['sir'].append(sir) sep_stats[events[j1]]['sar'].append(sar) cnt += 1 # if cnt == 5: break print(sep_stats) sep_stat_path = os.path.join(workspace, "sep_stats", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, "sep_stat.p") pp_data.create_folder(os.path.dirname(sep_stat_path)) cPickle.dump(sep_stats, open(sep_stat_path, 'wb'))
import sys sys.path.insert(0,'../..') import prepare_data whole_data_path = '../../../data/combined/whole.txt' # whole_data = prepare_data.load_whole_data(train_data_path, dev_data_path, test_data_path, wiki_data_path) whole_data = prepare_data.load_data(whole_data_path) # convert to lower case whole_data = prepare_data.to_lower(whole_data) # remove punctuation whole_data = prepare_data.remove_punct(whole_data) def get_vocab(data): vocab_list = [] for seq in data: for word in seq[0]: vocab_list.append(word) return vocab_list whole_vocab_list = get_vocab(whole_data) def write_to_file(file_path, vocab_list): with open(file_path, 'w', encoding='utf-8') as f: for word in vocab_list: