def dict_to_pickle(word_dict,py_dict,num_sum,path): with open(path['word_path'],'rb') as outfile: pickle.dump(word_path,outfile) outfile.close() with open(path['py_path'],'rb') as outfile: pickle.dump(py_dict,outfile) outfile.close() with open(path['num_path'],'rb') as outfile: pickle.dum(num_sum,outfile) outfile.close()
def main(sa): sln_filename = sa[0] sdf_filename = sa[1] sln_fps = FileHandler.SlnFile(sln_filename).get_fingerprint_list() sdf_fps = randomly_pick_from_sdf(sdf_filename, 400) pain_train, pain_test = train_test_split(sln_fps, test_size=0.2, random_state=24) control_train, control_test = train_test_split(sdf_fps, test_size=0.2, random_state=24) #rf = train_rf(pain_train + pain_test, # control_train + control_test, # n_est=300, rand_state=1986) #pickle.dump(rf, open("rf_n300.p", "wb")) #test_rf(pain_test, control_test, rf) #control_train = randomly_pick_from_sdf(sdf_filename, 400) #pain_test = sln_fps optimize_rf(pain_train, control_train) rf = train_rf(pain_train, control_train, n_est=300, rand_state=1986) pickle.dum(rf, open("rf_n300.p", "wb")) test_rf(pain_test, control_test, rf)
def train(train_mode): train_data = load_training() print("Total events:", len(train_data)) print("Total truth particles:", sum([y.shape[0] for x, y in train_data])) model = ModuleIDBasedRNN(input_dim=modules + 1, hidden_dim=20, output_dim=modules + 1, batch_size=1, device=device) model.to(device) print("total parameters:", tunable_parameters(model)) criterion = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9) loss_file_name = os.path.join('output', 'loss', 'ModuelRNN.pkg.gz') training_mode = True if training_mode: nepochs = 50 all_losses = [] for epoch in tqdm(range(nepochs)): # go through half of training data. loss_evt = [] for ievt in tqdm(range(int(len(train_data) / 2))): event, truth = train_data[ievt] total_loss = 0 for pID in truth.values: hits = event[event['particle_id'] == pID]['uID'].values input_, target_ = input_target(hits) input_ = input_.to(device) target_ = target_.to(device) model.hidden = model.init_hidden() model.zero_grad() output = model(input_) loss = criterion(output, target_) loss.backward() optimizer.step() total_loss += loss.item() / input_.size(1) loss_evt.append(total_loss / truth.shape[0]) all_losses.append(loss_evt) torch.save( model.state_dict(), os.path.join('output', 'model', 'RNNModule_' + str(epoch))) with open( os.path.join('output', 'loss', 'RNNModule_' + str(epoch)), 'wb') as fp: pickle.dum(loss_evt, fp) with open(loss_file_name, 'wb') as fp: pickle.dump(all_losses, fp) else: # in testing mode with open(loss_file_name, 'rb') as fp: all_losses = pickle.load(fp) print(all_losses)
val_loss = evaluate(val_data) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Anneal the learning rate. if prev_val_loss and val_loss > prev_val_loss: lr /= 4 prev_val_loss = val_loss # Run on test data and save the model. test_loss = evaluate(test_data) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89) if args.save != '': with open(args.save, 'wb') as f: torch.save(model, f) ##Saving Embedding ### embeddings = model.encoder #Embeddings_to_plot = embeddings.weight.data.numpy() Embeddings_to_plot = embeddings.weight.data.cpu().numpy() if args.saveembed != '': with open(args.saveembed, 'wb') as f_2: pickle.dum(embeddings_to_plot, f_2)