def train(args): print(args) assert not args.gpu or (args.gpu and torch.cuda.is_available()) random.seed(args.seed) data_loader = TrainLoader(args.data_dir) train_data = data_loader.train_data dev_data = data_loader.dev_data test_data = data_loader.test_data char_vocab = data_loader.token2id tag_vocab = data_loader.tag2id char_vocab_size = len(char_vocab) print('Training samples:', len(train_data)) print('Valid samples:', len(dev_data)) print('Test samples:', len(test_data)) print(char_vocab) print(tag_vocab) model = LSTMClassifier(char_vocab_size, args.char_dim, args.hidden_dim, len(tag_vocab), args.gpu) if args.gpu: model = model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) model = train_model(model, optimizer, train_data, dev_data, char_vocab, tag_vocab, args.batch_size, args.num_epochs, args.gpu) save_model(model, {'chars': char_vocab, 'tags': tag_vocab}, args.save_to) evaluate_test_set(model, test_data, char_vocab, tag_vocab, args.gpu)
def test(args): dataset_test = FirmaData_select_subjects(args.data_dir, 30, args.subset_par[0], args.subset_par[1], args.subset_par[2],args.subjects_list, subset='test', pre_process=False) dat_loader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True) if args.test_all: for loadid in range(args.num_epochs): saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar') checkpoint = torch.load(saved_model) model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3) model.cuda() model.load_state_dict(checkpoint['model_state_dict']) acc,f1,_= evaluate_test_set(model, dat_loader_test) print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1)) else: loadid=args.test_id saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar') checkpoint = torch.load(saved_model) model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3) model.cuda() model.load_state_dict(checkpoint['model_state_dict']) acc,f1, _ = evaluate_test_set(model, dat_loader_test) print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1))
def train(args): #subject_lists=[[5,7,9],[12,9,16],[2,11,5],[17,9,6],[1,13,6]] #subject_lists = [[1,14,15,2,6,16,7],[3,12,4,15,9,10,2],[10,14,7,11,15,8,17],[16,10,6,5,13,8,12],[17,2,13,4,7,8,16]] #subject_lists=[[1,7,2,8,6,11,5,15,9,3],[4,3,10,11,15,7,16,6,14,17],[5,4,12,6,10,8,15,13,2,11],[13,4,6,3,7,12,2,10,16,5],[12,15,17,13,3,9,5,14,8,2]] subject_lists=[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]] window_sizes=[1,5,15,30,60] scns=['shared_data_1','shared_data_2'] n_weeks=['1_weeks','2_weeks','3_weeks'] random.seed(args.seed) logfile=open('log.txt','w+') for scn in scns: for n_week in n_weeks: data_dir = os.path.join(args.data_dir, scn, n_week) for window_size in window_sizes: for subjects_list in subject_lists: dataset_train = FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1], args.subset_par[2], subjects_list,subset='train', pre_process=False) dataset_val = FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1], args.subset_par[2],subjects_list, subset='val', pre_process=False) dataset_test=FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1], args.subset_par[2],subjects_list, subset='test', pre_process=False) dat_loader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True) dat_loader_val = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=True) dat_loader_test = DataLoader(dataset_test,batch_size=args.batch_size, shuffle=True) model = LSTMClassifier(dataset_train[0][0].shape[1], args.hidden_dim, output_size=len(subjects_list)) model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) save_pa= os.path.join(args.save_path, scn, n_week, str(subjects_list),str(window_size)) _, test_id= train_model(model, optimizer, dat_loader_train, dat_loader_val, args.num_epochs, save_pa) saved_model = os.path.join(save_pa, 'model_' + str(test_id) + '.tar') checkpoint = torch.load(saved_model) model.load_state_dict(checkpoint['model_state_dict']) acc, f1, _ = evaluate_test_set(model, dat_loader_test) logfile.write(scn+' '+n_week+' ' + 'subjects: '+ str(subjects_list)+ 'window_size {} model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(window_size,test_id,acc,f1) +"\n") logfile.flush() logfile.close()
print('Done !') ### Load data print('Loading Data ... ', end='') d_test = TextDataset(word2idx, fp_test, train=False) test_loader = DataLoader(d_test, batch_size=batch_size, shuffle=False) print('Done !') ### Load model print('Loading Model ... ', end='') model = LSTMClassifier(embedding_dim, hidden_dim, num_layers, batch_size) model.cuda() model.load_state_dict(torch.load(fp_model)) print('Done !') ### Predict print('Predict ... ', end='') pred = predict(model, test_loader) print('Done !') ### Write print('Write ... ', end='') df_pred = pd.DataFrame()