def main(): args = parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() t0 = time.time() if not os.path.exists(args.save_directory): os.makedirs(args.save_directory) LOG_PATH = os.path.join(args.save_directory, 'log') with open(LOG_PATH, 'w+') as ouf: pass print("Loading File Paths") train_paths, dev_paths, test_paths = load_paths() train_paths, dev_paths, test_paths = train_paths[:args. max_train], dev_paths[: args . max_dev], test_paths[: args . max_test] t1 = time.time() print_log('%.2f Seconds' % (t1 - t0), LOG_PATH) print("Loading Y Data") test_paths = test_paths[:args.max_data] train_ys = load_y_data('train') # 1-dim np array of strings dev_ys = load_y_data('dev') test_ys = load_y_data('test') t1 = time.time() print_log('%.2f Seconds' % (t1 - t0), LOG_PATH) print("Building Charset") charset = build_charset(np.concatenate((train_ys, dev_ys, test_ys), axis=0)) charmap = make_charmap(charset) # {string: int} charcount = len(charset) t1 = time.time() print_log('%.2f Seconds' % (t1 - t0), LOG_PATH) print("Mapping Characters") testchars = map_characters(test_ys, charmap) print("Building Loader") test_loader = make_loader(test_paths, testchars, args, shuffle=False, batch_size=args.batch_size) if 'transcript' in args.test_mode or 'perp' in args.test_mode: print("Building Model") model = Seq2SeqModel(args, vocab_size=charcount) CKPT_PATH = os.path.join(args.save_directory, 'model.ckpt') print('ckpt : ' + CKPT_PATH) if args.cuda: model.load_state_dict(torch.load(CKPT_PATH)) else: gpu_dict = torch.load(CKPT_PATH, map_location=lambda storage, loc: storage) cpu_model_dict = {} for key, val in gpu_dict.items(): cpu_model_dict[key] = val.cpu() model.load_state_dict(cpu_model_dict) print("Loaded Checkpoint") if args.cuda: model = model.cuda() model.eval() TRANSCRIPT_LOG_PATH = os.path.join(args.save_directory, 'transcript_log.txt') CSV_PATH = os.path.join(args.save_directory, 'submission.csv') if 'transcript' in args.test_mode: print('generating transcripts') with open(TRANSCRIPT_LOG_PATH, 'w+') as ouf: pass if not os.path.exists(CSV_PATH): transcripts = write_transcripts(path=CSV_PATH, args=args, model=model, loader=test_loader, charset=charset, log_path=TRANSCRIPT_LOG_PATH) else: transcripts = [] with open(CSV_PATH, 'r') as csvfile: raw_csv = csv.reader(csvfile) for row in raw_csv: with open(TRANSCRIPT_LOG_PATH, 'a') as ouf: ouf.write('%s\n' % row[1]) transcripts.append(row[1]) t1 = time.time() print("Finshed Writing Transcripts") print('%.2f Seconds' % (t1 - t0)) if 'cer' in args.test_mode: print('calculating cer values') CER_LOG_PATH = os.path.join(args.save_directory, 'cer_log.txt') with open(CER_LOG_PATH, 'w+') as ouf: pass transcripts = [] with open(CSV_PATH, 'r') as csvfile: raw_csv = csv.reader(csvfile) for row in raw_csv: transcripts.append(row[1]) transcripts = [l.strip() for l in transcripts] CER_PATH = os.path.join(args.save_directory, 'test_cer.npy') DIST_PATH = os.path.join(args.save_directory, 'test_dist.npy') norm_dists, dists = cer_from_transcripts(transcripts, test_ys, CER_LOG_PATH) np.save(CER_PATH, norm_dists) np.save(DIST_PATH, dists) if 'perp' in args.test_mode: print('calculating perp values') PERP_LOG_PATH = os.path.join(args.save_directory, 'perp_log.txt') with open(PERP_LOG_PATH, 'w+') as ouf: pass PERP_PATH = os.path.join(args.save_directory, 'test_perp.npy') all_perps = perplexities_from_x(model, test_loader) np.save(PERP_PATH, all_perps)
import sys from tensorflow.keras.utils import to_categorical from sklearn.metrics import confusion_matrix, classification_report from collections import Counter import time from data_processing.processing import * from models.cnns import cnn1d, cnn2d, cnn2d_2, cnn2d_wavelets from models.recurrent import lstm1d, lstm1d_2 from models.predefined import inception1d, resnet1d from baseline import parse_args, get_based_parameters from data_processing.raw_data_processing import samples if __name__ == '__main__': args = parse_args() data, path, numclasses, model_name, saved_model_path, prefix = get_based_parameters( ) signal_1d_file = args.data_1d_file #'cpsc2018/cpsc_1145_25.pkl' ## Use PTB_XL data as unknown classes for CPSC 2018 dataset unknown_file = 'ptb_xl_data/ptb_xl_75_25_cwt.pkl' unknown_classes = 75 unknown_num = unknown_classes * samples #load model model = load_model(saved_model_path) x_train, x_test, x_valid, y_train, y_test, y_valid = load_train_test_data( path, prefix, data) #prepare data