def main(args): """ Main entry point of the app """ #Convert two sets into two branch with one set in both and one set in only one (Use for this file) params = params_2 params_sv = params_3 from data import H5Data files = glob.glob(train_path + "/newdata_*.h5") files_val = files[:5] # take first 5 for validation files_train = files[5:] # take rest for training label = 'new' outdir = args.outdir vv_branch = args.vv_branch os.system('mkdir -p %s'%outdir) batch_size = 128 data_train = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_train.set_file_names(files_train) data_val = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_val.set_file_names(files_val) n_val=data_val.count_data() n_train=data_train.count_data() print("val data:", n_val) print("train data:", n_train) reweightQCD = args.reweightQCD if reweightQCD: print("Calculating QCD Reweight") #hist[0] is signal, hist[1] is QCD hist = [np.zeros(8) for i in range(2)] NBINS= 8 # number of bins for loss function MMAX = 200. # max value MMIN = 40. # min value binWidth = (MMAX - MMIN) / NBINS for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_train.generate_data(),total=n_train/batch_size): target = sub_Y[0] spec = np.digitize(sub_Z[0][:,0,2], bins=np.linspace(MMIN,MMAX,NBINS+1), right=False)-1 for truth, mass in zip(target, spec): hist[int(truth[0])][mass] += 1 QCD_weights = [hist[0][n]/hist[1][n] for n in range(8)] from gnn import GraphNet gnn = GraphNet(N, n_targets, len(params), args.hidden, N_sv, len(params_sv), vv_branch=int(vv_branch), De=args.De, Do=args.Do) # pre load best model #gnn.load_state_dict(torch.load('out/gnn_new_best.pth')) n_epochs = 200 def custom_loss(out, target, weights): loss_funct = nn.CrossEntropyLoss(reduction='none', reduce = False) loss = torch.mul(loss_funct(out, targetv.cuda()).double(), (torch.from_numpy(np.array(weights)).double()).cuda()) return(torch.mean(loss)) optimizer = optim.Adam(gnn.parameters(), lr = 0.0001) loss_vals_training = np.zeros(n_epochs) loss_std_training = np.zeros(n_epochs) loss_vals_validation = np.zeros(n_epochs) loss_std_validation = np.zeros(n_epochs) acc_vals_training = np.zeros(n_epochs) acc_vals_validation = np.zeros(n_epochs) acc_std_training = np.zeros(n_epochs) acc_std_validation = np.zeros(n_epochs) final_epoch = 0 l_val_best = 99999 from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score softmax = torch.nn.Softmax(dim=1) for m in range(n_epochs): print("Epoch %s\n" % m) #torch.cuda.empty_cache() final_epoch = m lst = [] loss_val = [] loss_training = [] correct = [] for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_train.generate_data(),total=n_train/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = np.digitize(sub_Z[0][:,0,2], bins=np.linspace(MMIN,MMAX,NBINS+1), right=False)-1 trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis = 1)).long()).cuda() targetv_pivot = (torch.from_numpy(spec).long()).cuda() weights = [] for truth, mass in zip(target, spec): if truth[0] == 0: weights.append(1) else: weights.append(QCD_weights[mass]) optimizer.zero_grad() out = gnn(trainingv.cuda(), trainingv_sv.cuda()) l = custom_loss(out, targetv.cuda(), weights) loss_training.append(l.item()) l.backward() optimizer.step() loss_string = "Loss: %s" % "{0:.5f}".format(l.item()) del trainingv, trainingv_sv, targetv for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_val.generate_data(),total=n_val/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = np.digitize(sub_Z[0][:,0,2], bins=np.linspace(MMIN,MMAX,NBINS+1), right=False)-1 trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis = 1)).long()).cuda() targetv_pivot = (torch.from_numpy(spec).long()).cuda() weights = [] for truth, mass in zip(target, spec): if truth[0] == 0: weights.append(1) else: weights.append(QCD_weights[mass]) out = gnn(trainingv.cuda(), trainingv_sv.cuda()) lst.append(softmax(out).cpu().data.numpy()) l_val = custom_loss(out, targetv.cuda(), weights) loss_val.append(l_val.item()) targetv_cpu = targetv.cpu().data.numpy() correct.append(target) del trainingv, trainingv_sv, targetv l_val = np.mean(np.array(loss_val)) predicted = np.concatenate(lst) #(torch.FloatTensor(np.concatenate(lst))).to(device) print('\nValidation Loss: ', l_val) l_training = np.mean(np.array(loss_training)) print('Training Loss: ', l_training) val_targetv = np.concatenate(correct) #torch.FloatTensor(np.array(correct)).cuda() torch.save(gnn.state_dict(), '%s/gnn_%s_last.pth'%(outdir,label)) if l_val < l_val_best: print("new best model") l_val_best = l_val torch.save(gnn.state_dict(), '%s/gnn_%s_best.pth'%(outdir,label)) print(val_targetv.shape, predicted.shape) print(val_targetv, predicted) acc_vals_validation[m] = accuracy_score(val_targetv[:,0],predicted[:,0]>0.5) print("Validation Accuracy: ", acc_vals_validation[m]) loss_vals_training[m] = l_training loss_vals_validation[m] = l_val loss_std_validation[m] = np.std(np.array(loss_val)) loss_std_training[m] = np.std(np.array(loss_training)) if m > 5 and all(loss_vals_validation[max(0, m - 5):m] > min(np.append(loss_vals_validation[0:max(0, m - 5)], 200))): print('Early Stopping...') print(loss_vals_training, '\n', np.diff(loss_vals_training)) break print() acc_vals_validation = acc_vals_validation[:(final_epoch)] loss_vals_training = loss_vals_training[:(final_epoch)] loss_vals_validation = loss_vals_validation[:(final_epoch)] loss_std_validation = loss_std_validation[:(final_epoch)] loss_std_training = loss_std_training[:(final_epoch)] np.save('%s/acc_vals_validation_%s.npy'%(outdir,label),acc_vals_validation) np.save('%s/loss_vals_training_%s.npy'%(outdir,label),loss_vals_training) np.save('%s/loss_vals_validation_%s.npy'%(outdir,label),loss_vals_validation) np.save('%s/loss_std_validation_%s.npy'%(outdir,label),loss_std_validation) np.save('%s/loss_std_training_%s.npy'%(outdir,label),loss_std_training)
'sv_d3dsig', 'sv_costhetasvpv' ] # In[ ]: from data import H5Data files = [] for i in range(52): files.append("/nfshome/emoreno/IN/data/opendata/train/data_" + str(i)) data = H5Data(batch_size = 100000, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup') data.set_file_names(files) # In[ ]: test = test_2 params = params_2 test_sv = test_3 params_sv = params_3 N = test.shape[2] # In[ ]:
def main(args): """ Main entry point of the app """ #Convert two sets into two branch with one set in both and one set in only one (Use for this file) params = params_2 params_sv = params_3 from data import H5Data files = glob.glob(train_path + "/newdata_*.h5") files_val = files[:5] # take first 5 for validation files_train = files[5:] # take rest for training label = 'new' outdir = args.outdir os.system('mkdir -p %s' % outdir) batch_size = 1024 data_train = H5Data(batch_size=batch_size, cache=None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_train.set_file_names(files_train) data_val = H5Data(batch_size=batch_size, cache=None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_val.set_file_names(files_val) n_val = data_val.count_data() n_train = data_train.count_data() print("val data:", n_val) print("train data:", n_train) from ddb import model_DeepDoubleXReference keras_model = model_DeepDoubleXReference(inputs=[ Input(shape=(N, len(params))), Input(shape=(N_sv, len(params_sv))) ], num_classes=n_targets, scale_hidden=2, hlf_input=None, datasets=['cpf', 'sv']) keras_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) keras_model.summary() early_stopping = EarlyStopping(monitor='val_loss', patience=20) model_checkpoint = ModelCheckpoint('%s/keras_model_best.h5' % outdir, monitor='val_loss', save_best_only=True) callbacks = [early_stopping, model_checkpoint] keras_model.fit_generator( data_train.inf_generate_data_keras(), validation_data=data_val.inf_generate_data_keras(), epochs=200, steps_per_epoch=np.ceil(n_train / batch_size), validation_steps=np.ceil(n_val / batch_size), callbacks=callbacks)
def main(args): """ Main entry point of the app """ #Convert two sets into two branch with one set in both and one set in only one (Use for this file) params = params_2 params_sv = params_3 from data import H5Data files = glob.glob(train_path + "/newdata_*.h5") files_val = files[:5] # take first 5 for validation files_train = files[5:] # take rest for training label = 'new' outdir = args.outdir vv_branch = args.vv_branch os.system('mkdir -p %s' % outdir) batch_size = 128 data_train = H5Data(batch_size=batch_size, cache=None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_train.set_file_names(files_train) data_val = H5Data(batch_size=batch_size, cache=None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_val.set_file_names(files_val) n_val = data_val.count_data() n_train = data_train.count_data() print("val data:", n_val) print("train data:", n_train) from gnn import GraphNetAdv, Rx gnn = GraphNetAdv(N, n_targets, len(params), args.hidden, N_sv, len(params_sv), vv_branch=int(vv_branch), De=args.De, Do=args.Do) DfR = Rx(Do=args.Do, hidden=64, nbins=args.nbins) # pre load best model gnn.load_state_dict(torch.load('%s/gnn_new_best.pth' % args.preload)) n_epochs = 100 n_epochs_pretrain = 5 loss = nn.CrossEntropyLoss(reduction='mean') #optimizer = optim.SGD(gnn.parameters(), momentum=0, lr = 0.00001) #opt_DfR = optim.SGD(DfR.parameters(), momentum=0, lr = 0.0001) optimizer = optim.Adam(gnn.parameters(), lr=0.00001) opt_DfR = optim.Adam(DfR.parameters(), lr=0.0001) loss_vals_training = np.zeros(n_epochs) loss_std_training = np.zeros(n_epochs) loss_vals_validation = np.zeros(n_epochs) loss_std_validation = np.zeros(n_epochs) loss_G_vals_training = np.zeros(n_epochs) loss_G_std_training = np.zeros(n_epochs) loss_G_vals_validation = np.zeros(n_epochs) loss_G_std_validation = np.zeros(n_epochs) loss_R_vals_training = np.zeros(n_epochs) loss_R_std_training = np.zeros(n_epochs) loss_R_vals_validation = np.zeros(n_epochs) loss_R_std_validation = np.zeros(n_epochs) acc_vals_training = np.zeros(n_epochs) acc_vals_validation = np.zeros(n_epochs) acc_std_training = np.zeros(n_epochs) acc_std_validation = np.zeros(n_epochs) final_epoch = 0 l_val_best = 99999 from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score softmax = torch.nn.Softmax(dim=1) for m in range(n_epochs_pretrain): print("Pretrain epoch %s\n" % m) for sub_X, sub_Y, sub_Z in tqdm.tqdm(data_train.generate_data(), total=n_train / batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = np.digitize(sub_Z[0][:, 0, 2], bins=np.linspace(MMIN, MMAX, args.nbins + 1), right=False) - 1 trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis=1)).long()).cuda() targetv_pivot = (torch.from_numpy(spec).long()).cuda() # Pretrain adversary gnn.eval() DfR.train() optimizer.zero_grad() opt_DfR.zero_grad() out = gnn(trainingv, trainingv_sv) mask = targetv.le(0.5) # get QCD background masked_out = torch.masked_select(out[1].transpose(0, 1), mask).view(args.Do, -1).transpose(0, 1) out_DfR = DfR(masked_out) masked_targetv_pivot = torch.masked_select(targetv_pivot, mask) l_DfR = loss(out_DfR, masked_targetv_pivot) l_DfR.backward() opt_DfR.step() loss_string = "Loss: %s" % "{0:.5f}".format(l_DfR.item()) del trainingv, trainingv_sv, targetv, targetv_pivot for m in range(n_epochs): print("Epoch %s\n" % m) #torch.cuda.empty_cache() final_epoch = m lst = [] loss_val = [] loss_G_val = [] loss_R_val = [] loss_training = [] loss_G_training = [] loss_R_training = [] correct = [] for sub_X, sub_Y, sub_Z in tqdm.tqdm(data_train.generate_data(), total=n_train / batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = np.digitize(sub_Z[0][:, 0, 2], bins=np.linspace(MMIN, MMAX, args.nbins + 1), right=False) - 1 trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis=1)).long()).cuda() targetv_pivot = (torch.from_numpy(spec).long()).cuda() # Train classifier gnn.train() DfR.eval() optimizer.zero_grad() opt_DfR.zero_grad() out = gnn(trainingv, trainingv_sv) mask = targetv.le(0.5) # get QCD background masked_out = torch.masked_select(out[1].transpose(0, 1), mask).view(args.Do, -1).transpose(0, 1) out_DfR = DfR(masked_out) masked_targetv_pivot = torch.masked_select(targetv_pivot, mask) l = loss(out[0], targetv) l_DfR = loss(out_DfR, masked_targetv_pivot) l_total = l - args.lam * l_DfR l_total.backward() optimizer.step() # Train adversary gnn.eval() DfR.train() optimizer.zero_grad() opt_DfR.zero_grad() out = gnn(trainingv, trainingv_sv) mask = targetv.le(0.5) # get QCD background masked_out = torch.masked_select(out[1].transpose(0, 1), mask).view(args.Do, -1).transpose(0, 1) out_DfR = DfR(masked_out) l_DfR = loss(out_DfR, masked_targetv_pivot) l_DfR.backward() opt_DfR.step() # record losses after both updates gnn.eval() DfR.eval() out = gnn(trainingv, trainingv_sv) mask = targetv.le(0.5) # get QCD background masked_out = torch.masked_select(out[1].transpose(0, 1), mask).view(args.Do, -1).transpose(0, 1) out_DfR = DfR(masked_out) l = loss(out[0], targetv) l_DfR = loss(out_DfR, masked_targetv_pivot) l_total = l - args.lam * l_DfR loss_training.append(l_total.item()) loss_G_training.append(l.item()) loss_R_training.append(l_DfR.item()) loss_string = "Loss: %s" % "{0:.5f}".format(l_total.item()) del trainingv, trainingv_sv, targetv, targetv_pivot for sub_X, sub_Y, sub_Z in tqdm.tqdm(data_val.generate_data(), total=n_val / batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = np.digitize(sub_Z[0][:, 0, 2], bins=np.linspace(MMIN, MMAX, args.nbins + 1), right=False) - 1 trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis=1)).long()).cuda() targetv_pivot = (torch.from_numpy(spec).long()).cuda() gnn.eval() DfR.eval() out = gnn(trainingv, trainingv_sv) mask = targetv.le(0.5) # get QCD background masked_out = torch.masked_select(out[1].transpose(0, 1), mask).view(args.Do, -1).transpose(0, 1) out_DfR = DfR(masked_out) masked_targetv_pivot = torch.masked_select(targetv_pivot, mask) l_val = loss(out[0], targetv.cuda()) l_DfR_val = loss(out_DfR, masked_targetv_pivot) l_total_val = l_val - args.lam * l_DfR_val targetv_cpu = targetv.cpu().data.numpy() lst.append(softmax(out[0]).cpu().data.numpy()) correct.append(target) loss_val.append(l_total_val.item()) loss_G_val.append(l_val.item()) loss_R_val.append(l_DfR_val.item()) del trainingv, trainingv_sv, targetv, targetv_pivot l_val = np.mean(np.array(loss_val)) print('\nValidation Loss: ', l_val) l_training = np.mean(np.array(loss_training)) print('Training Loss: ', l_training) predicted = np.concatenate( lst) #(torch.FloatTensor(np.concatenate(lst))).to(device) val_targetv = np.concatenate( correct) #torch.FloatTensor(np.array(correct)).cuda() torch.save(gnn.state_dict(), '%s/gnn_%s_last.pth' % (outdir, label)) if l_val < l_val_best: print("new best model") l_val_best = l_val torch.save(gnn.state_dict(), '%s/gnn_%s_best.pth' % (outdir, label)) print(val_targetv.shape, predicted.shape) print(val_targetv, predicted) acc_vals_validation[m] = accuracy_score(val_targetv[:, 0], predicted[:, 0] > 0.5) print("Validation Accuracy: ", acc_vals_validation[m]) loss_vals_training[m] = l_training loss_vals_validation[m] = l_val loss_G_vals_training[m] = np.mean(np.array(loss_G_training)) loss_G_vals_validation[m] = np.mean(np.array(loss_G_val)) loss_R_vals_training[m] = np.mean(np.array(loss_R_training)) loss_R_vals_validation[m] = np.mean(np.array(loss_R_val)) loss_std_training[m] = np.std(np.array(loss_training)) loss_std_validation[m] = np.std(np.array(loss_val)) loss_G_std_training[m] = np.std(np.array(loss_G_training)) loss_G_std_validation[m] = np.std(np.array(loss_G_val)) loss_R_std_training[m] = np.std(np.array(loss_R_training)) loss_R_std_validation[m] = np.std(np.array(loss_R_val)) if m > 5 and all(loss_vals_validation[max(0, m - 5):m] > min( np.append(loss_vals_validation[0:max(0, m - 5)], 200))): print('Early Stopping...') print(loss_vals_training, '\n', np.diff(loss_vals_training)) #break print() acc_vals_validation = acc_vals_validation[:(final_epoch)] loss_vals_training = loss_vals_training[:(final_epoch)] loss_vals_validation = loss_vals_validation[:(final_epoch)] loss_G_vals_training = loss_G_vals_training[:(final_epoch)] loss_G_vals_validation = loss_G_vals_validation[:(final_epoch)] loss_R_vals_training = loss_R_vals_training[:(final_epoch)] loss_R_vals_validation = loss_R_vals_validation[:(final_epoch)] loss_std_validation = loss_std_validation[:(final_epoch)] loss_std_training = loss_std_training[:(final_epoch)] loss_G_std_validation = loss_G_std_validation[:(final_epoch)] loss_G_std_training = loss_G_std_training[:(final_epoch)] loss_R_std_validation = loss_R_std_validation[:(final_epoch)] loss_R_std_training = loss_R_std_training[:(final_epoch)] np.save('%s/acc_vals_validation_%s.npy' % (outdir, label), acc_vals_validation) np.save('%s/loss_vals_training_%s.npy' % (outdir, label), loss_vals_training) np.save('%s/loss_vals_validation_%s.npy' % (outdir, label), loss_vals_validation) np.save('%s/loss_G_vals_training_%s.npy' % (outdir, label), loss_G_vals_training) np.save('%s/loss_G_vals_validation_%s.npy' % (outdir, label), loss_G_vals_validation) np.save('%s/loss_R_vals_training_%s.npy' % (outdir, label), loss_R_vals_training) np.save('%s/loss_R_vals_validation_%s.npy' % (outdir, label), loss_R_vals_validation) np.save('%s/loss_std_validation_%s.npy' % (outdir, label), loss_std_validation) np.save('%s/loss_std_training_%s.npy' % (outdir, label), loss_std_training) np.save('%s/loss_G_std_validation_%s.npy' % (outdir, label), loss_G_std_validation) np.save('%s/loss_G_std_training_%s.npy' % (outdir, label), loss_G_std_training) np.save('%s/loss_R_std_validation_%s.npy' % (outdir, label), loss_R_std_validation) np.save('%s/loss_R_std_training_%s.npy' % (outdir, label), loss_R_std_training)
N = 100 # number of particles n_targets = 5 # number of classes n_features = 4 # number of features per particles save_path = 'models/8/' best_path = save_path + '/best/' batch_size = 256 n_epochs = 100 files = glob.glob(train_path + "/jetImage*_{}p*.h5".format(N)) num_files = len(files) files_val = files[:int(num_files*0.2)] # take first 20% for validation files_train = files[int(num_files*0.2):] # take rest for training files_trial = files[int(num_files*0.2):int(num_files*0.3)] data_train = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='jetConstituentList', labels_name='jets', spectators_name=None) data_val = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='jetConstituentList', labels_name='jets', spectators_name=None) # Define loss function def loss(model, x, y): cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True) y_ = model(x) return cce(y_true=y, y_pred=y_)
def main(args): """ Main entry point of the app """ #Convert two sets into two branch with one set in both and one set in only one (Use for this file) params = params_2 params_sv = params_3 from data import H5Data files = glob.glob(train_path + "/newdata_*.h5") files_val = files[:5] # take first 5 for validation files_train = files[5:] # take rest for training label = 'new' outdir = args.outdir vv_branch = args.vv_branch os.system('mkdir -p %s'%outdir) batch_size = 128 data_train = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_train.set_file_names(files_train) data_val = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_val.set_file_names(files_val) n_val=data_val.count_data() n_train=data_train.count_data() print("val data:", n_val) print("train data:", n_train) from gnn import GraphNet gnn = GraphNet(N, n_targets, len(params), args.hidden, N_sv, len(params_sv), vv_branch=int(vv_branch), De=args.De, Do=args.Do) # pre load best model #gnn.load_state_dict(torch.load('out/gnn_new_best.pth')) n_epochs = 200 loss = nn.CrossEntropyLoss(reduction='mean') optimizer = optim.Adam(gnn.parameters(), lr = 0.0001) loss_vals_training = np.zeros(n_epochs) loss_std_training = np.zeros(n_epochs) loss_vals_validation = np.zeros(n_epochs) loss_std_validation = np.zeros(n_epochs) acc_vals_training = np.zeros(n_epochs) acc_vals_validation = np.zeros(n_epochs) acc_std_training = np.zeros(n_epochs) acc_std_validation = np.zeros(n_epochs) final_epoch = 0 l_val_best = 99999 from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score softmax = torch.nn.Softmax(dim=1) for m in range(n_epochs): print("Epoch %s\n" % m) #torch.cuda.empty_cache() final_epoch = m lst = [] loss_val = [] loss_training = [] correct = [] for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_train.generate_data(),total=n_train/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = sub_Z[0] trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis = 1)).long()).cuda() optimizer.zero_grad() out = gnn(trainingv.cuda(), trainingv_sv.cuda()) l = loss(out, targetv.cuda()) loss_training.append(l.item()) l.backward() optimizer.step() loss_string = "Loss: %s" % "{0:.5f}".format(l.item()) del trainingv, trainingv_sv, targetv for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_val.generate_data(),total=n_val/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] spec = sub_Z[0] trainingv = (torch.FloatTensor(training)).cuda() trainingv_sv = (torch.FloatTensor(training_sv)).cuda() targetv = (torch.from_numpy(np.argmax(target, axis = 1)).long()).cuda() out = gnn(trainingv.cuda(), trainingv_sv.cuda()) lst.append(softmax(out).cpu().data.numpy()) l_val = loss(out, targetv.cuda()) loss_val.append(l_val.item()) targetv_cpu = targetv.cpu().data.numpy() correct.append(target) del trainingv, trainingv_sv, targetv l_val = np.mean(np.array(loss_val)) predicted = np.concatenate(lst) #(torch.FloatTensor(np.concatenate(lst))).to(device) print('\nValidation Loss: ', l_val) l_training = np.mean(np.array(loss_training)) print('Training Loss: ', l_training) val_targetv = np.concatenate(correct) #torch.FloatTensor(np.array(correct)).cuda() torch.save(gnn.state_dict(), '%s/gnn_%s_last.pth'%(outdir,label)) if l_val < l_val_best: print("new best model") l_val_best = l_val torch.save(gnn.state_dict(), '%s/gnn_%s_best.pth'%(outdir,label)) print(val_targetv.shape, predicted.shape) print(val_targetv, predicted) acc_vals_validation[m] = accuracy_score(val_targetv[:,0],predicted[:,0]>0.5) print("Validation Accuracy: ", acc_vals_validation[m]) loss_vals_training[m] = l_training loss_vals_validation[m] = l_val loss_std_validation[m] = np.std(np.array(loss_val)) loss_std_training[m] = np.std(np.array(loss_training)) if m > 5 and all(loss_vals_validation[max(0, m - 5):m] > min(np.append(loss_vals_validation[0:max(0, m - 5)], 200))): print('Early Stopping...') print(loss_vals_training, '\n', np.diff(loss_vals_training)) break print() acc_vals_validation = acc_vals_validation[:(final_epoch)] loss_vals_training = loss_vals_training[:(final_epoch)] loss_vals_validation = loss_vals_validation[:(final_epoch)] loss_std_validation = loss_std_validation[:(final_epoch)] loss_std_training = loss_std_training[:(final_epoch)] np.save('%s/acc_vals_validation_%s.npy'%(outdir,label),acc_vals_validation) np.save('%s/loss_vals_training_%s.npy'%(outdir,label),loss_vals_training) np.save('%s/loss_vals_validation_%s.npy'%(outdir,label),loss_vals_validation) np.save('%s/loss_std_validation_%s.npy'%(outdir,label),loss_std_validation) np.save('%s/loss_std_training_%s.npy'%(outdir,label),loss_std_training)
def main(args): """ Main entry point of the app """ #Convert two sets into two branch with one set in both and one set in only one (Use for this file) params = params_2 params_sv = params_3 from data import H5Data files = glob.glob(train_path + "/newdata_*.h5") files_val = files[4:5] # take first 5 for validation files_train = files[5:6] # take rest for training label = 'new' outdir = args.outdir vv_branch = args.vv_branch pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) batch_size = 256 data_train = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_train.set_file_names(files_train) data_val = H5Data(batch_size = batch_size, cache = None, preloading=0, features_name='training_subgroup', labels_name='target_subgroup', spectators_name='spectator_subgroup') data_val.set_file_names(files_val) n_val=data_val.count_data() n_train=data_train.count_data() print("val data:", n_val) print("train data:", n_train) net_args = (N, n_targets, len(params), args.hidden, N_sv, len(params_sv)) net_kwargs = {"vv_branch": int(vv_branch), "De": args.De, "Do": args.Do} gnn = InteractionModel(*net_args, **net_kwargs) gnn.compile(optimizer='adam') print("Model compiled") #### Start training #### n_epochs = 1 # Keep results for plotting train_loss_results = [] train_accuracy_results = [] val_loss_results = [] val_accuracy_results = [] # Log directory for Tensorboard current_time = datetime.datetime.now().strftime("%Y%m%d") train_log_dir = 'logs/gradient_tape/' + current_time + '/train' test_log_dir = 'logs/gradient_tape/' + current_time + '/test' pathlib.Path(train_log_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(test_log_dir).mkdir(parents=True, exist_ok=True) train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) for epoch in range(n_epochs): # Tool to keep track of the metrics epoch_loss_avg = tf.keras.metrics.Mean('train_loss', dtype=tf.float32) epoch_accuracy = tf.keras.metrics.CategoricalAccuracy('train_accuracy') val_epoch_loss_avg = tf.keras.metrics.Mean('test_loss', dtype=tf.float32) val_epoch_accuracy = tf.keras.metrics.CategoricalAccuracy('test_accuracy') # Training for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_train.generate_data(),total = n_train/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] # Define loss function cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True) def loss(model, x1, x2, y): y_ = model([x1, x2]) return cce(y_true=y, y_pred=y_) def grad(model, input_par, input_sv, targets): with tf.GradientTape() as tape: loss_value = loss(model, input_par, input_sv, targets) return loss_value, tape.gradient(loss_value, model.trainable_variables) # Define optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) # Compute loss and gradients loss_value, grads = grad(gnn, training, training_sv, target) # Update the gradients optimizer.apply_gradients(zip(grads, gnn.trainable_variables)) # Track progress epoch_loss_avg(loss_value) # Add current batch loss # Compare predicted label to actual label epoch_accuracy(target, tf.nn.softmax(gnn([training, training_sv]))) # Validation for sub_X,sub_Y,sub_Z in tqdm.tqdm(data_val.generate_data(),total = n_val/batch_size): training = sub_X[2] training_sv = sub_X[3] target = sub_Y[0] # Compute the loss loss_value = loss(gnn, training, training_sv, target) # Track progress val_epoch_loss_avg(loss_value) val_epoch_accuracy(target, tf.nn.softmax(gnn([training, training_sv]))) # End epoch train_loss_results.append(epoch_loss_avg.result()) train_accuracy_results.append(epoch_accuracy.result()) val_loss_results.append(val_epoch_loss_avg.result()) val_accuracy_results.append(val_epoch_accuracy.result()) # Logs for tensorboard with train_summary_writer.as_default(): tf.summary.scalar('loss', epoch_loss_avg.result(), step=epoch) tf.summary.scalar('accuracy', epoch_accuracy.result(), step=epoch) with test_summary_writer.as_default(): tf.summary.scalar('loss', val_epoch_loss_avg.result(), step=epoch) tf.summary.scalar('accuracy', val_epoch_accuracy.result(), step=epoch) template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print (template.format(epoch+1, epoch_loss_avg.result(), epoch_accuracy.result()*100, val_epoch_loss_avg.result(), val_epoch_accuracy.result()*100)) # Reset metrics every epoch epoch_loss_avg.reset_states() val_epoch_loss_avg.reset_states() epoch_accuracy.reset_states() val_epoch_accuracy.reset_states() # Save the model after training save_path = 'models/2/' pathlib.Path(save_path).mkdir(parents=True, exist_ok=True) tf.saved_model.save(gnn, save_path)