def test(): model_path = '../log/exp_cnn_lstm_ctc_spectrum201/exp_cnn3*41_3*21_4lstm_ctc_Melspectrum_stride_1_2/exp2_82.1483/best_model_cv80.8660423723.pkl' package = torch.load(model_path) data_dir = '../data_prepare/data' rnn_param = package["rnn_param"] add_cnn = package["add_cnn"] cnn_param = package["cnn_param"] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False #weight_decay = package['epoch']['weight_decay'] #print(weight_decay) decoder_type = 'Greedy' test_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) if add_cnn: test_loader = myCNNDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) else: test_loader = myDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2phone, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2phone) import pickle f = open('../decode_map_48-39/map_dict.pkl', 'rb') map_dict = pickle.load(f) f.close() print(map_dict) vis = visdom.Visdom(env='fan') legend = [] for i in range(49): legend.append(test_dataset.int2phone[i]) for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if not add_cnn: inputs = inputs.transpose(0,1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if not add_cnn: inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs, visual = model(inputs, visualize=True) probs = probs.data.cpu() if add_cnn: max_length = probs.size(0) input_size_list = [int(x*max_length) for x in input_size_list] decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin: "+ labels[x]) print("decoded: "+ decoded[x]) if add_cnn: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts = opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_after_cnn') after_cnn = visual[1][0][0].transpose(0, 1).data.cpu() vis.heatmap(after_cnn, opts = opts) opts = dict(title=labels[0], xlabel="frame", ylabel='feature_before_rnn') before_rnn = visual[2].transpose(0, 1)[0].transpose(0, 1).data.cpu() vis.heatmap(before_rnn, opts=opts) show_prob = visual[3].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) else: spectrum_inputs = visual[0][0][0].transpose(0, 1).data.cpu() opts = dict(title=labels[0], xlabel="frame", ylabel='spectrum') vis.heatmap(spectrum_inputs, opts = opts) show_prob = visual[1].transpose(0, 1)[0].data.cpu() line_opts = dict(title=decoded[0], xlabel="frame", ylabel="probability", legend=legend) x = show_prob.size()[0] vis.line(show_prob.numpy(), X=np.array(range(x)), opts=line_opts) break
def test(): args = parser.parse_args() if args.model_path is not None: package = torch.load(args.model_path) data_dir = '../../../CTC_pytorch_data/data_prepare/data' else: cf = configparser.ConfigParser() cf.read(args.conf) model_path = cf.get('Model', 'model_file') data_dir = cf.get('Data', 'data_dir') package = torch.load(model_path) rnn_param = package["rnn_param"] add_cnn = package["add_cnn"] cnn_param = package["cnn_param"] num_class = package["num_class"] feature_type = package['epoch']['feature_type'] n_feats = package['epoch']['n_feats'] out_type = package['epoch']['out_type'] drop_out = package['_drop_out'] try: mel = package['epoch']['mel'] except: mel = False USE_CUDA = cf.getboolean('Training', 'use_cuda') beam_width = cf.getint('Decode', 'beam_width') lm_alpha = cf.getfloat('Decode', 'lm_alpha') decoder_type = cf.get('Decode', 'decode_type') data_set = cf.get('Decode', 'eval_dataset') test_dataset = SpeechDataset(data_dir, data_set=data_set, feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) if add_cnn: test_loader = SpeechCNNDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) else: test_loader = SpeechDataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=False) model.load_state_dict(package['state_dict']) model.eval() if USE_CUDA: model = model.cuda() if decoder_type == 'Greedy': decoder = GreedyDecoder(test_dataset.int2class, space_idx=-1, blank_index=0) else: decoder = BeamDecoder(test_dataset.int2class, beam_width=beam_width, blank_index=0, space_idx=-1, lm_path=args.lm_path, lm_alpha=lm_alpha) if args.map_48_39 is not None: import pickle f = open(args.map_48_39, 'rb') map_dict = pickle.load(f) f.close() print(map_dict) total_wer = 0 total_cer = 0 start = time.time() for data in test_loader: inputs, target, input_sizes, input_size_list, target_sizes = data if not add_cnn: inputs = inputs.transpose(0,1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() if not add_cnn: inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_size_list) probs = model(inputs) if add_cnn: max_length = probs.size(0) input_size_list = [int(x*max_length) for x in input_size_list] probs = probs.data.cpu() decoded = decoder.decode(probs, input_size_list) targets = decoder._unflatten_targets(target, target_sizes) labels = decoder._process_strings(decoder._convert_to_strings(targets)) if args.map_48_39 is not None: for x in range(len(labels)): label = labels[x].strip().split(' ') for i in range(len(label)): label[i] = map_dict[label[i]] labels[x] = ' '.join(label) decode = decoded[x].strip().split(' ') for i in range(len(decode)): decode[i] = map_dict[decode[i]] decoded[x] = ' '.join(decode) for x in range(len(labels)): print("origin : " + labels[x]) print("decoded: " + decoded[x]) cer = 0 wer = 0 for x in range(len(labels)): cer += decoder.cer(decoded[x], labels[x]) wer += decoder.wer(decoded[x], labels[x]) decoder.num_word += len(labels[x].split()) decoder.num_char += len(labels[x]) total_cer += cer total_wer += wer CER = (float(total_cer) / decoder.num_char)*100 WER = (float(total_wer) / decoder.num_word)*100 print("Character error rate on %s set: %.4f" % (data_set, CER)) print("Word error rate on %s set: %.4f" % (data_set, WER)) end = time.time() time_used = (end - start) / 60.0 print("time used for decode %d sentences: %.4f minutes." % (len(test_dataset), time_used))