def main(args): if args.pos1 == 'train': train_dataset = myDataset(os.path.join(args.train_dir, 'feature')) #prepare dataloader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=myDataset.get_collate_fn(args.prediction_num, args.neg_num, args.reduce_times)) saver = pytorch_saver(10, args.save_dir) #build model model = CPC(args.input_dim, args.feat_dim, reduce_times=args.reduce_times, prediction_num=args.prediction_num) if args.resume_dir != '': print('loading model') model.load_state_dict( pytorch_saver.load_dir(args.resume_dir)['state_dict']) model.train() model.cuda() args.log = os.path.join(args.save_dir, args.log) train(model, train_data_loader, saver, args.epochs, args.learning_rate, args.log) else: test_dataset = myDataset(os.path.join(args.test_dir, 'feature'), os.path.join(args.test_dir, 'phn_align.pkl')) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, collate_fn=myDataset.get_collate_fn(args.prediction_num, args.neg_num, args.reduce_times, train=False)) if args.resume_dir == '': print("resume should exist in inference mode", file=sys.stderr) sys.exit(-1) else: model = CPC(args.input_dim, args.feat_dim, reduce_times=args.reduce_times, prediction_num=args.prediction_num) print('loading model') model.load_state_dict( pytorch_saver.load_dir(args.resume_dir)['state_dict']) model.eval() model.cuda() inference(model, test_data_loader, args.result_dir, args.reduce_times)
def test(): args = parser.parse_args() if args.model_path is not None: package = torch.load(args.model_path) data_dir = '../data_prepare/data' else: cf = ConfigParser.ConfigParser() cf.read(args.conf) model_path = cf.get('Model', 'model_file') data_dir = cf.get('Data', 'data_dir') package = torch.load(model_path) input_size = package['input_size'] layers = package['rnn_layers'] hidden_size = package['hidden_size'] rnn_type = package['rnn_type'] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] model_type = package['name'] drop_out = package['_drop_out'] #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = args.decode_type test_dataset = myDataset(data_dir, data_set='test', feature_type=feature_type, out_type=out_type, n_feats=n_feats) if model_type == 'CNN_LSTM_CTC': model = CNN_LSTM_CTC(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myCNNDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) else: model = CTC_RNN(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone, top_paths=40, beam_width=20, blank_index=0, space_idx=-1, lm_path=None, lm_alpha=0.8, lm_beta=1, cutoff_prob=1.0, dic=test_dataset.phone_word) total_wer = 0 total_cer = 0 start = time.time() for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if model.name == 'CTC_RNN': inputs = inputs.transpose(0,1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if model.name == 'CTC_RNN': inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs = model(inputs) probs = probs.data.cpu() #print(probs) decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): print("origin: "+ labels[x]) print("decoded: "+ decoded[x]) cer = 0 wer = 0 for x in range(len(labels)): cer += decoder.cer(decoded[x], labels[x]) wer += decoder.wer(decoded[x], labels[x]) decoder.num_word += len(labels[x].split()) decoder.num_char += len(labels[x]) total_cer += cer total_wer += wer CER = (1 - float(total_cer) / decoder.num_char)*100 WER = (1 - float(total_wer) / decoder.num_word)*100 print("Character error rate on test set: %.4f" % CER) print("Word error rate on test set: %.4f" % WER) end = time.time() time_used = (end - start) / 60.0 print("Time used for decoding %d sentences: %.4f minutes" % (len(test_dataset), time_used))
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") logger = init_logger(os.path.join(args.log_dir, 'train_lstm_ctc.log')) dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats) train_loader = myDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_dataset = myDataset(data_dir, data_set="dev", feature_type=feature_type, out_type=out_type, n_feats=n_feats) dev_loader = myDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = RNN[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'num_class') model = CTC_RNN(rnn_input_size=rnn_input_size, rnn_hidden_size=rnn_hidden_size, rnn_layers=rnn_layers, rnn_type=rnn_type, bidirectional=bidirectional, batch_norm=batch_norm, num_class=num_class, drop_out=drop_out) #model.apply(xavier_uniform_init) print(model.name) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') try: seed = cf.getint('Training', 'seed') except: seed = torch.cuda.initial_seed() params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed 'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size, 'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type } if USE_CUDA: torch.cuda.manual_seed(seed) model = model.cuda() print(params) loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom() title = dataset+' '+feature_type+str(n_feats)+' LSTM_CTC' opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'), dict(title=title+" CER on Train", ylabel = 'CER', xlabel = 'Epoch'), dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')] viz_window = [None, None, None] count = 0 learning_rate = init_lr acc_best = -100 acc_best_true = -100 adjust_rate_flag = False stop_train = False adjust_time = 0 start_time = time.time() loss_results = [] training_cer_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger) loss_results.append(loss) cer = dev(model, train_loader, decoder, logger) print("cer on training set is %.4f" % cer) logger.info("cer on training set is %.4f" % cer) training_cer_results.append(cer) acc = dev(model, dev_loader, decoder, logger) dev_cer_results.append(acc) #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl' #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl' if acc > (acc_best + end_adjust_acc): acc_best = acc adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (acc > acc_best - end_adjust_acc): adjust_rate_count += 1 if acc > acc_best and acc > acc_best_true: acc_best_true = acc model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 0 #torch.save(model.state_dict(), model_path_reject) print("adjust_rate_count:"+str(adjust_rate_count)) print('adjust_time:'+str(adjust_time)) logger.info("adjust_rate_count:"+str(adjust_rate_count)) logger.info('adjust_time:'+str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 acc_best = acc_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [loss_results[0:count], training_cer_results[0:count], dev_cer_results[0:count]] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],) else: viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',) print("End training, best cv acc is: %.4f" % acc_best) logger.info("End training, best cv acc is: %.4f" % acc_best) best_path = os.path.join(args.log_dir, 'best_model'+'_cv'+str(acc_best)+'.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch']=count torch.save(CTC_RNN.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, training_cer_results=training_cer_results, dev_cer_results=dev_cer_results), best_path)
def main(args): if args.pos1 == 'train': train_dataset = myDataset(args.train_data) dev_dataset = myDataset(args.dev_data) #prepare dataloader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=myDataset.get_collate_fn(args.question_length, args.option_length)) dev_data_loader = torch.utils.data.DataLoader( dev_dataset, batch_size=args.batch_size, num_workers=4, collate_fn=myDataset.get_collate_fn(args.question_length, args.option_length)) saver = pytorch_saver(10, args.save_dir) #build model model = qacnn_1d(args.question_length, args.option_length, args.filter_num, args.filter_size, args.cnn_layers, args.dnn_size, train_dataset.word_dim, dropout=args.dropout) if args.resume_dir != '': model.load_state_dict( pytorch_saver.load_dir(args.resume_dir)['state_dict']) model.train() model.cuda() args.log = os.path.join(args.save_dir, args.log) train(model, train_data_loader, dev_data_loader, saver, args.epochs, args.learning_rate, args.log) else: test_dataset = myDataset(args.test_data) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, collate_fn=myDataset.get_collate_fn(args.question_length, args.option_length), shuffle=False, num_workers=2) if args.resume_dir == '': print("resume should exist in inference mode", file=sys.stderr) sys.exit(-1) else: model = qacnn_1d(args.question_length, args.option_length, args.filter_num, args.filter_size, args.cnn_layers, args.dnn_size, test_dataset.word_dim, dropout=args.dropout) model.load_state_dict( pytorch_saver.load_dir(args.resume_dir)['state_dict']) model.eval() model.cuda() inference(model, test_data_loader, args.test_result)
default=0.0001, metavar='N', help='learning rate for training (default: 0.001)') args = parser.parse_args() if os.path.exists('logs/' + datasetname) == False: os.makedirs('logs/' + datasetname) log_dir = 'logs/' + datasetname train_path = './dataset/iris/iris_train.data' test_path = './dataset/iris/iris_test.data' for i in range(1, repeat + 1): sdae_savepath = ("model/sdae-run-iris-%d.pt" % i) if os.path.exists("model/sdae-run-iris-%d.pt" % i) == False: print("Experiment #%d" % i) write_log("Experiment #%d" % i, log_dir) train_data = myDataset(train_path, -1) # test_data=myDataset(test_path,-1) train_loader = data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, collate_fn=train_data.collate_fn) # test_loader = data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, # collate_fn=train_data.collate_fn) # pretrain sdae = StackedDAE(input_dim=4, z_dim=2, binary=False, encodeLayer=[8], decodeLayer=[8], activation="relu",
def main(args): # Initialize models # n_channels is input channels and n_classes is output channels model_G = UNet(n_channels=1, n_classes=3) model_D = ConvDis(in_channels=3, in_size=args.image_size) # Initialize start epochs for G and D start_epoch_G = start_epoch_D = 0 # Start epoch for this session start_epoch = 0 # Load saved models if resume training if args.model_G: print('Resume model G: %s' % args.model_G) checkpoint_G = torch.load(model_G) model_G.load_state_dict(checkpoint_G['state_dict']) start_epoch_G = checkpoint_G['epoch'] if args.model_D: print('Resume model D: %s' % args.model_D) checkpoint_D = torch.load(model_D) model_D.load_state_dict(checkpoint_D['state_dict']) start_epoch_D = checkpoint_D['epoch'] assert start_epoch_G == start_epoch_D # Shift models to GPU model_G.cuda() model_D.cuda() # Initialize optimizers optimizer_G = optim.Adam(model_G.parameters(), lr=args.lr_G, betas=(0.5, 0.999), eps=1e-8, weight_decay=args.weight_decay) optimizer_D = optim.Adam(model_D.parameters(), lr=args.lr_D, betas=(0.5, 0.999), eps=1e-8, weight_decay=args.weight_decay) # Load optimizers if resume training if args.model_G: optimizer_G.load_state_dict(checkpoint_G['optimizer']) if args.model_D: optimizer_D.load_state_dict(checkpoint_D['optimizer']) # Loss Function global criterion criterion = nn.BCELoss() global L1 L1 = nn.L1Loss() global FeatureLoss FeatureLoss = FeatureLoss() # Dataset data_root = args.path dataset = args.dataset if dataset == 'unsplash': from data_loader import Unsplash_Dataset as myDataset elif dataset == 'cifar': from data_loader import CIFAR_Dataset as myDataset # elif dataset == 'bob': # from load_data import Spongebob_Dataset as myDataset else: raise ValueError('dataset type not supported') # Define transform image_transform = transforms.Compose( [transforms.CenterCrop(args.image_size), transforms.ToTensor()]) data_train = myDataset(data_root, mode='train', transform=image_transform, types='raw', shuffle=True) train_loader = data.DataLoader(data_train, batch_size=args.batch_size, shuffle=False) data_val = myDataset(data_root, mode='test', transform=image_transform, types='raw', shuffle=True) val_loader = data.DataLoader(data_val, batch_size=args.batch_size, shuffle=False) global val_bs val_bs = val_loader.batch_size # set up plotter, path, etc. global iteration, print_interval, plotter, plotter_basic, plot_train_result_interval iteration = 0 print_interval = 5 plot_train_result_interval = 100 plotter = Plotter_GAN_TV() plotter_basic = Plotter_GAN() global img_path size = str(args.image_size) date = str(datetime.datetime.now().month) + '_' + str( datetime.datetime.now().day) img_path = '/scratch/as3ek/image_colorization/results/img/%s/GAN_%s%s_%dL1_bs%d_%s_lr_D%s_lr_G%s/' \ % (date, args.dataset, size, args.lamb, args.batch_size, 'Adam', str(args.lr_D), str(args.lr_G)) model_path = '/scratch/as3ek/image_colorization/results/model/%s/GAN_%s%s_%dL1_bs%d_%s_lr_D%s_lr_G%s/' \ % (date, args.dataset, size, args.lamb, args.batch_size, 'Adam', str(args.lr_D), str(args.lr_G)) if not os.path.exists(img_path): os.makedirs(img_path) if not os.path.exists(model_path): os.makedirs(model_path) start_epoch = 0 for epoch in range(start_epoch, args.num_epoch): print('Epoch {}/{}'.format(epoch, args.num_epoch - 1)) print('-' * 20) # if epoch == 0: # val_lerrG, val_errD = validate(val_loader, model_G, model_D, optimizer_G, optimizer_D, epoch=-1) # train train_errG, train_errD = train(train_loader, model_G, model_D, optimizer_G, optimizer_D, epoch, iteration) # validate val_lerrG, val_errD = validate(val_loader, model_G, model_D, optimizer_G, optimizer_D, epoch) plotter.train_update(train_errG, train_errD) plotter.val_update(val_lerrG, val_errD) plotter.draw(img_path + 'train_val.png') if args.save: print('Saving check point') save_checkpoint({'epoch': epoch + 1, 'state_dict': model_G.state_dict(), 'optimizer': optimizer_G.state_dict(), }, filename=model_path+'G_epoch%d.pth.tar' \ % epoch) save_checkpoint({'epoch': epoch + 1, 'state_dict': model_D.state_dict(), 'optimizer': optimizer_D.state_dict(), }, filename=model_path+'D_epoch%d.pth.tar' \ % epoch)
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") try: seed = cf.get('Training', 'seed') seed = long(seed) except: seed = torch.cuda.initial_seed() torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed_all(seed) logger = init_logger(os.path.join(args.log_dir, 'train_ctc_model.log')) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = RNN[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') rnn_param = { "rnn_input_size": rnn_input_size, "rnn_hidden_size": rnn_hidden_size, "rnn_layers": rnn_layers, "rnn_type": rnn_type, "bidirectional": bidirectional, "batch_norm": batch_norm } num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'drop_out') add_cnn = cf.getboolean('Model', 'add_cnn') cnn_param = {} layers = cf.getint('CNN', 'layers') channel = eval(cf.get('CNN', 'channel')) kernel_size = eval(cf.get('CNN', 'kernel_size')) stride = eval(cf.get('CNN', 'stride')) padding = eval(cf.get('CNN', 'padding')) pooling = eval(cf.get('CNN', 'pooling')) batch_norm = cf.getboolean('CNN', 'batch_norm') activation_function = activate_f[cf.get('CNN', 'activation_function')] cnn_param['batch_norm'] = batch_norm cnn_param['activate_function'] = activation_function cnn_param["layer"] = [] for layer in range(layers): layer_param = [ channel[layer], kernel_size[layer], stride[layer], padding[layer] ] if pooling is not None: layer_param.append(pooling[layer]) else: layer_param.append(None) cnn_param["layer"].append(layer_param) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) #model.apply(xavier_uniform_init) for idx, m in enumerate(model.modules()): print(idx, m) break dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') mel = cf.getboolean('Data', 'mel') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) dev_dataset = myDataset(data_dir, data_set="dev", feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) if add_cnn: train_loader = myCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = myCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) else: train_loader = myDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = myDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') params = { 'num_epoches': num_epoches, 'end_adjust_acc': end_adjust_acc, 'mel': mel, 'seed': seed, 'decay': decay, 'learning_rate': init_lr, 'weight_decay': weight_decay, 'batch_size': batch_size, 'feature_type': feature_type, 'n_feats': n_feats, 'out_type': out_type } print(params) if USE_CUDA: model = model.cuda() loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom() if add_cnn: title = dataset + ' ' + feature_type + str(n_feats) + ' CNN_LSTM_CTC' else: title = dataset + ' ' + feature_type + str(n_feats) + ' LSTM_CTC' opts = [ dict(title=title + " Loss", ylabel='Loss', xlabel='Epoch'), dict(title=title + " Loss on Dev", ylabel='DEV Loss', xlabel='Epoch'), dict(title=title + ' CER on DEV', ylabel='DEV CER', xlabel='Epoch') ] viz_window = [None, None, None] count = 0 learning_rate = init_lr loss_best = 1000 loss_best_true = 1000 adjust_rate_flag = False stop_train = False adjust_time = 0 acc_best = 0 acc_best_true = 0 start_time = time.time() loss_results = [] dev_loss_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, add_cnn=add_cnn, print_every=20) loss_results.append(loss) acc, dev_loss = dev(model, dev_loader, loss_fn, decoder, logger, add_cnn=add_cnn) print("loss on dev set is %.4f" % dev_loss) logger.info("loss on dev set is %.4f" % dev_loss) dev_loss_results.append(dev_loss) dev_cer_results.append(acc) #adjust learning rate by dev_loss if dev_loss < (loss_best - end_adjust_acc): loss_best = dev_loss adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (dev_loss < loss_best + end_adjust_acc): adjust_rate_count += 1 if dev_loss < loss_best and dev_loss < loss_best_true: loss_best_true = dev_loss model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 10 if acc > acc_best: acc_best = acc best_model_state = copy.deepcopy(model.state_dict()) best_op_state = copy.deepcopy(optimizer.state_dict()) ''' #adjust learning rate by dev_acc if acc > (acc_best + end_adjust_acc): acc_best = acc adjust_rate_count = 0 loss_best = dev_loss model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (acc > acc_best - end_adjust_acc): adjust_rate_count += 1 if acc > acc_best and acc > acc_best_true: acc_best_true = acc loss_best = dev_loss model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 0 #torch.save(model.state_dict(), model_path_reject) ''' print("adjust_rate_count:" + str(adjust_rate_count)) print('adjust_time:' + str(adjust_time)) logger.info("adjust_rate_count:" + str(adjust_rate_count)) logger.info('adjust_time:' + str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 if loss_best > loss_best_true: loss_best = loss_best_true #if acc_best < acc_best_true: # acc_best = acc_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [ loss_results[0:count], dev_loss_results[0:count], dev_cer_results[0:count] ] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line( X=np.array(x_axis), Y=np.array(y_axis[x]), opts=opts[x], ) else: viz.line( X=np.array(x_axis), Y=np.array(y_axis[x]), win=viz_window[x], update='replace', ) print("End training, best cv loss is: %.4f, acc is: %.4f" % (loss_best, acc_best)) logger.info("End training, best loss acc is: %.4f, acc is: %.4f" % (loss_best, acc_best)) model.load_state_dict(best_model_state) optimizer.load_state_dict(best_op_state) best_path = os.path.join(args.log_dir, 'best_model' + '_cv' + str(acc_best) + '.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch'] = count torch.save( CTC_Model.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, dev_loss_results=dev_loss_results, dev_cer_results=dev_cer_results), best_path)
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") logger = init_logger(os.path.join(args.log_dir, 'train_cnn_lstm_ctc.log')) dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats) train_loader = myCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_dataset = myDataset(data_dir, data_set="test", feature_type=feature_type, out_type=out_type, n_feats=n_feats) dev_loader = myCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = RNN[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'num_class') model = CNN_LSTM_CTC(rnn_input_size=rnn_input_size, rnn_hidden_size=rnn_hidden_size, rnn_layers=rnn_layers, rnn_type=rnn_type, bidirectional=bidirectional, batch_norm=batch_norm, num_class=num_class, drop_out=drop_out) #model.apply(xavier_uniform_init) print(model.name) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') try: seed = cf.getint('Training', 'seed') except: seed = torch.cuda.initial_seed() params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed, 'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size, 'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type } if USE_CUDA: torch.cuda.manual_seed(seed) model = model.cuda() print(params) loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom(env='863_corpus') title = dataset+' '+feature_type+str(n_feats)+' CNN_LSTM_CTC' opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'), dict(title=title+" CER on Train", ylabel = 'CER', xlabel = 'Epoch'), dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')] viz_window = [None, None, None] count = 0 learning_rate = init_lr acc_best = -100 acc_best_true = -100 adjust_rate_flag = False stop_train = False adjust_time = 0 start_time = time.time() loss_results = [] training_cer_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, print_every=20) loss_results.append(loss) cer = dev(model, train_loader, decoder, logger) print("cer on training set is %.4f" % cer) logger.info("cer on training set is %.4f" % cer) training_cer_results.append(cer) acc = dev(model, dev_loader, decoder, logger) dev_cer_results.append(acc) #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl' #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl' if acc > (acc_best + end_adjust_acc): acc_best = acc adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (acc > acc_best - end_adjust_acc): adjust_rate_count += 1 if acc > acc_best and acc > acc_best_true: acc_best_true = acc model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 0 #torch.save(model.state_dict(), model_path_reject) print("adjust_rate_count:"+str(adjust_rate_count)) print('adjust_time:'+str(adjust_time)) logger.info("adjust_rate_count:"+str(adjust_rate_count)) logger.info('adjust_time:'+str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 acc_best = acc_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [loss_results[0:count], training_cer_results[0:count], dev_cer_results[0:count]] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],) else: viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',) print("End training, best cv acc is: %.4f" % acc_best) logger.info("End training, best cv acc is: %.4f" % acc_best) best_path = os.path.join(args.log_dir, 'best_model'+'_cv'+str(acc_best)+'.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch']=count torch.save(CNN_LSTM_CTC.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, training_cer_results=training_cer_results, dev_cer_results=dev_cer_results), best_path)
def test(): model_path = '../log/exp_cnn_lstm_ctc_spectrum201/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum_stride_1_2/exp2_82.1483/best_model_cv80.8660423723.pkl' package = torch.load(model_path) data_dir = '/home/fran/Documents/CTC_pytorch_data/data_prepare/data' rnn_param = package["rnn_param"] add_cnn = package["add_cnn"] cnn_param = package["cnn_param"] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = 'Greedy' test_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) if add_cnn: test_loader = myCNNDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) else: test_loader = myDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone) import pickle f = open('../decode_map_48-39/map_dict.pkl', 'rb') map_dict = pickle.load(f) f.close() print(map_dict) vis = visdom.Visdom(env='fan') legend = [] for i in range(49): legend.append(test_dataset.int2phone[i]) for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if not add_cnn: inputs = inputs.transpose(0, 1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if not add_cnn: inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs, visual = model(inputs, visualize=True) probs = probs.data.cpu() if add_cnn: max_length = probs.size(0) input_size_list = [int(x * max_length) for x in input_size_list] decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin: " + labels[x]) print("decoded: " + decoded[x]) if add_cnn: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts=opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn') after_cnn = visual[1][0][0].transpose(0, 1).data.cpu() vis.heatmap(after_cnn, opts=opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_before_rnn') before_rnn = visual[2].transpose(0, 1)[0].transpose(0, 1).data.cpu() vis.heatmap(before_rnn, opts=opts) show_prob = visual[3].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) else: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts=opts) show_prob = visual[1].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) break
def test(): model_path = '../log/exp_cnn_lstm_ctc/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum/exp3_81.7186/best_model_cv80.4941223351.pkl' package = torch.load(model_path) data_dir = '../data_prepare/data' input_size = package['input_size'] layers = package['rnn_layers'] hidden_size = package['hidden_size'] rnn_type = package['rnn_type'] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] model_type = package['name'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = 'Greedy' test_dataset = myDataset(data_dir, data_set='test', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) if model_type == 'CNN_LSTM_CTC': model = CNN_LSTM_CTC(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myCNNDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) else: model = CTC_RNN(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone, top_paths=3, beam_width=20, blank_index=0, space_idx=-1, lm_path=None, dict_path=None, trie_path=None, lm_alpha=10, lm_beta1=1, lm_beta2=1) import pickle f = open('../decode_map_48-39/map_dict.pkl', 'rb') map_dict = pickle.load(f) f.close() print(map_dict) vis = visdom.Visdom(env='fan') legend = [] for i in range(49): legend.append(test_dataset.int2phone[i]) for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if model.name == 'CTC_RNN': inputs = inputs.transpose(0, 1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if model.name == 'CTC_RNN': inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs, visual = model(inputs, test=True) probs = probs.data.cpu() decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin: " + labels[x]) print("decoded: " + decoded[x]) spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts=opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn1') after_cnn = visual[1][0][0].transpose(0, 1).data.cpu() vis.heatmap(after_cnn, opts=opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn2') after_cnn2 = visual[2][0][0].transpose(0, 1).data.cpu() vis.heatmap(after_cnn2, opts=opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_before_rnn') before_rnn = visual[3].transpose(0, 1)[0].transpose(0, 1).data.cpu() vis.heatmap(before_rnn, opts=opts) show_prob = visual[4].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) break
def test(): args = parser.parse_args() if args.model_path is not None: package = torch.load(args.model_path) data_dir = '../data_prepare/data' else: cf = ConfigParser.ConfigParser() cf.read(args.conf) model_path = cf.get('Model', 'model_file') data_dir = cf.get('Data', 'data_dir') package = torch.load(model_path) input_size = package['input_size'] layers = package['rnn_layers'] hidden_size = package['hidden_size'] rnn_type = package['rnn_type'] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] model_type = package['name'] drop_out = package['_drop_out'] #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = args.decode_type test_dataset = myDataset(data_dir, data_set='test', feature_type=feature_type, out_type=out_type, n_feats=n_feats) if model_type == 'CNN_LSTM_CTC': model = CNN_LSTM_CTC(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myCNNDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) else: model = CTC_RNN(rnn_input_size=input_size, rnn_hidden_size=hidden_size, rnn_layers=layers, rnn_type=rnn_type, bidirectional=True, batch_norm=True, num_class=num_class, drop_out=drop_out) test_loader = myDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone, top_paths=40, beam_width=20, blank_index=0, space_idx=-1, lm_path=None, lm_alpha=0.8, lm_beta=1, cutoff_prob=1.0, dic=test_dataset.phone_word) total_wer = 0 total_cer = 0 start = time.time() for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if model.name == 'CTC_RNN': inputs = inputs.transpose(0, 1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if model.name == 'CTC_RNN': inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs = model(inputs) probs = probs.data.cpu() #print(probs) decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): print("origin: " + labels[x]) print("decoded: " + decoded[x]) cer = 0 wer = 0 for x in range(len(labels)): cer += decoder.cer(decoded[x], labels[x]) wer += decoder.wer(decoded[x], labels[x]) decoder.num_word += len(labels[x].split()) decoder.num_char += len(labels[x]) total_cer += cer total_wer += wer CER = (1 - float(total_cer) / decoder.num_char) * 100 WER = (1 - float(total_wer) / decoder.num_word) * 100 print("Character error rate on test set: %.4f" % CER) print("Word error rate on test set: %.4f" % WER) end = time.time() time_used = (end - start) / 60.0 print("Time used for decoding %d sentences: %.4f minutes" % (len(test_dataset), time_used))
def main(): savePath = os.path.join("models", "SoM_Mix_1") saveDescription = "mix: 1-2" trainDatasets, devDatasets, testDatasets = [], [], [] ### dataset 1 ######### datasetLoader1 = DatasetLoader() datasetLoader1.dataset = Datasets().audio_features_mfcc_functionals datasetLoader1.loadDataset() trainDatasets.append(datasetLoader1.trainDataset) devDatasets.append(datasetLoader1.devDataset) testDatasets.append(datasetLoader1.testDataset) ####################### ### dataset 2 ######### datasetLoader2 = DatasetLoader() datasetLoader2.dataset = Datasets().visual_features_functionals datasetLoader2.loadDataset() trainDatasets.append(datasetLoader2.trainDataset) devDatasets.append(datasetLoader2.devDataset) testDatasets.append(datasetLoader2.testDataset) ####################### ### models ############ model1Path = os.path.join("models", "SoM_GRU_1", "best") model2Path = os.path.join("models", "SoM_GRU_2", "best") models = [model1Path, model2Path] ####################### # the paths to where the fused features would be (or already are) trainPath = os.path.join(savePath, "trainData.csv") devPath = os.path.join(savePath, "devData.csv") testPath = os.path.join(savePath, "testData.csv") # comment out the next three lines if already got the CSV files of fused feats for train modelsOutToCSVs(models, trainDatasets, trainPath) modelsOutToCSVs(models, devDatasets, devPath) modelsOutToCSVs(models, testDatasets, testPath) trainDataset = myDataset(address=trainPath, tars=[1, 2]) devDataset = myDataset(address=devPath, tars=[1, 2]) testDataset = myDataset(address=testPath, tars=[1, 2]) tarsFunc = lambda tars: tars[:, 0 ] - tars[:, 1 ] # the target for which the model will get trained. Depends on how it is loaded from the dataset! featSize = trainDataset.shape()[-1] model = fullyConnected(featSize, 1, hiddenSize=32) wrapper = ModelWrapper([model], tabuList=[], device='cuda:0') # comment out the next lines if you just want to test wrapper.train(trainDataset, epochs=2500, firstEpoch=1, savePath=savePath, evalDataset=devDataset, csvPath=os.path.join(savePath, "trainLog.csv"), computeLossFor=len(trainDataset), computeLossForEval=len(devDataset), tolerance=5, tarsFunc=tarsFunc, plusTar=-1) wrapper.load_model(os.path.join(savePath, "best")) _, evalLoss = wrapper.testCompute(devDataset, verbose=True, computeLossFor=len(devDataset), tarsFunc=tarsFunc, plusTar=-1) _, evalLoss2 = wrapper.testCompute(devDataset, verbose=True, computeLossFor=len(devDataset), tarsFunc=tarsFunc, plusTar=1) _, testLoss = wrapper.testCompute(testDataset, verbose=True, computeLossFor=len(testDataset), tarsFunc=tarsFunc, plusTar=-1) _, testLoss2 = wrapper.testCompute(testDataset, verbose=True, computeLossFor=len(testDataset), tarsFunc=tarsFunc, plusTar=1) writeLineToCSV(os.path.join("models", "results.csv"), [ "savePath", "saveDescription", "evalLoss", "evalLoss2", "evalCCC", "evalCCC2", "testLoss", "testLoss2", "testCCC", "testCCC2" ], [ savePath, saveDescription, evalLoss, evalLoss2, 1 - evalLoss, 1 - evalLoss2, testLoss, testLoss2, 1 - testLoss, 1 - testLoss2 ])
os.makedirs('logs/'+datasetname) writer = SummaryWriter(log_dir='logs/'+datasetname) log_dir='logs/'+datasetname train_path = './dataset/iris/iris_train.data' # test_path = '/DATACENTER1/xiao.peng/DCN_keras-master/dataset/RCV1/Processed/data-0.pkl' # all_path='./dataset/wine/wine.data' for i in range(1, repeat+1): # sdae_savepath = ("model/sdae-dcn-run-"+datasetname+"-%d.pt" % i) #best pretrain sdae_savepath="D:\code\dec-pytorch\model\sdae-run-iris-1.pt" if os.path.exists(sdae_savepath)==False: print("Experiment #%d" % i) write_log("Experiment #%d" % i,log_dir) train_data=myDataset(train_path,-1, '.data') # test_data=myDataset(test_path,-1, '.pkl') train_loader = data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, collate_fn=train_data.collate_fn,num_workers=4) # test_loader = data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, # collate_fn=train_data.collate_fn,num_workers=4) # pretrain sdae = StackedDAE(input_dim=4, z_dim=2, binary=False, encodeLayer=[8], decodeLayer=[8], activation="relu", dropout=0,log_dir=log_dir) sdae.cuda() # print(sdae) sdae.pretrain(train_loader, lr=args.sdae_pre_lr, batch_size=batch_size, num_epochs=20, corrupt=0.2, loss_type="mse")
def test(): model_path = '../log/exp_cnn_lstm_ctc_spectrum201/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum_stride_1_2/exp2_82.1483/best_model_cv80.8660423723.pkl' package = torch.load(model_path) data_dir = '../data_prepare/data' rnn_param = package["rnn_param"] add_cnn = package["add_cnn"] cnn_param = package["cnn_param"] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = 'Greedy' test_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) if add_cnn: test_loader = myCNNDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) else: test_loader = myDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone) import pickle f = open('../decode_map_48-39/map_dict.pkl', 'rb') map_dict = pickle.load(f) f.close() print(map_dict) vis = visdom.Visdom(env='fan') legend = [] for i in range(49): legend.append(test_dataset.int2phone[i]) for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if not add_cnn: inputs = inputs.transpose(0,1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if not add_cnn: inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs, visual = model(inputs, visualize=True) probs = probs.data.cpu() if add_cnn: max_length = probs.size(0) input_size_list = [int(x*max_length) for x in input_size_list] decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin: "+ labels[x]) print("decoded: "+ decoded[x]) if add_cnn: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts = opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn') after_cnn = visual[1][0][0].transpose(0, 1).data.cpu() vis.heatmap(after_cnn, opts = opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_before_rnn') before_rnn = visual[2].transpose(0, 1)[0].transpose(0, 1).data.cpu() vis.heatmap(before_rnn, opts=opts) show_prob = visual[3].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) else: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts = opts) show_prob = visual[1].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) break
def test(): args = parser.parse_args() if args.model_path is not None: package = torch.load(args.model_path) data_dir = '../data_prepare/data' else: cf = ConfigParser.ConfigParser() cf.read(args.conf) model_path = cf.get('Model', 'model_file') data_dir = cf.get('Data', 'data_dir') beam_width = cf.getint('Decode', 'beam_width') lm_alpha = cf.getfloat('Decode', 'lm_alpha') package = torch.load(model_path) rnn_param = package["rnn_param"] add_cnn = package["add_cnn"] cnn_param = package["cnn_param"] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = args.decode_type test_dataset = myDataset(data_dir, data_set=args.data_set, feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) if add_cnn: test_loader = myCNNDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) else: test_loader = myDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone, beam_width=beam_width, blank_index=0, space_idx=-1, lm_path=args.lm_path, lm_alpha=lm_alpha) if args.map_48_39 is not None: import pickle f = open(args.map_48_39, 'rb') map_dict = pickle.load(f) f.close() print(map_dict) total_wer = 0 total_cer = 0 start = time.time() for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if not add_cnn: inputs = inputs.transpose(0, 1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if not add_cnn: inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs = model(inputs) if add_cnn: max_length = probs.size(0) input_sizes_list = [int(x * max_length) for x in input_sizes_list] probs = probs.data.cpu() decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) if args.map_48_39 is not None: for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin : " + labels[x]) print("decoded: " + decoded[x]) cer = 0 wer = 0 for x in range(len(labels)): cer += decoder.cer(decoded[x], labels[x]) wer += decoder.wer(decoded[x], labels[x]) decoder.num_word += len(labels[x].split()) decoder.num_char += len(labels[x]) total_cer += cer total_wer += wer CER = (1 - float(total_cer) / decoder.num_char) * 100 WER = (1 - float(total_wer) / decoder.num_word) * 100 print("Character error rate on test set: %.4f" % CER) print("Word error rate on test set: %.4f" % WER) end = time.time() time_used = (end - start) / 60.0 print("time used for decode %d sentences: %.4f" % (len(test_dataset), time_used))