def training_from_flag(flags): """ Training interface. 1. Read data 2. initialize network 3. train network 4. record flags :param flag: The training flags read from command line or parameter.py :return: None """ if flags.use_cpu_only: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Get the data train_loader, test_loader = data_reader.read_data(flags) # Reset the boundary is normalized if flags.normalize_input: flags.geoboundary_norm = [-1, 1, -1, 1] print("Boundary is set at:", flags.geoboundary) print("Making network now") # Make Network ntwk = Network(Forward, flags, train_loader, test_loader) # Training process print("Start training now...") ntwk.train() # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
def predict_from_model(pre_trained_model, Xpred_file, shrink_factor=1, save_name=''): """ Predicting interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :return: None """ # Retrieve the flag object print("This is doing the prediction for file", Xpred_file) print("Retrieving flag object for parameters") if (pre_trained_model.startswith("models")): eval_model = pre_trained_model[7:] print("after removing prefix models/, now model_dir is:", eval_model) flags = load_flags(pre_trained_model) # Get the pre-trained model flags.eval_model = eval_model # Reset the eval mode # Get the data, this part is useless in prediction but just for simplicity train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(Backprop, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") pred_file, truth_file = ntwk.predict(Xpred_file, save_prefix=save_name + 'shrink_factor' + str(shrink_factor), shrink_factor=shrink_factor)
def evaluate_from_model(model_dir): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :return: None """ # Retrieve the flag object if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print("Retrieving flag object for parameters") flags = flag_reader.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode # Get the data train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(Forward, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) # Evaluation process print("Start eval now:") pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
def get_dataset_words(filename_list, field): words = set() for filename in tqdm(filename_list): dataset = data_reader.read_data(filename) for text in dataset[field]: words.update(data_reader.tokenize(text, None)) return words
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.backprop_step = eval_flags.backprop_step if flags.data_set == 'ballistics': flags.test_ratio = 0.001 elif flags.data_set == 'sine_wave': flags.test_ratio = 0.005 elif flags.data_set == 'robotic_arm': flags.test_ratio = 0.2 elif flags.data_set == 'sine_test_1d': flags.test_ratio = 0.05 flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 flags.lr = 0.05 flags.eval_batch_size = eval_flags.eval_batch_size flags.train_step = eval_flags.train_step # Get the data train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(Backprop, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: pred_file, truth_file = ntwk.evaluate( save_dir='/work/sr365/multi_eval/Backprop/' + flags.data_set, save_all=True) else: pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
def get_features_for_prediction(features, i, use_pca=False): X_train, y_train, X_test, X_val, y_val = [],[],[],[],[] for item in features: ## distinguish twitter glove and common glove ## distinguish deepmoji sum and avg feature, ty, mode = featureAnalysis(item) if feature=="glove" and ty=="twitter": constant.emb_dim = 200 elif: feature=="emoji": pass else: constant.emb_dim = 300 pass pass print(feature) ## prepare data for feature-10 folders vocab = generate_vocab(include_test=True) train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=False, include_test=True) ## Add labels to dev_no_lab for getting features ind = dev_no_lab[0] X_text = dev_no_lab[1] labels = ["others" for i in range(len(ind))] dev = (ind, X_text, labels) ## feature_list: glove emoji elmo bert deepmoji emo2vec ## if you want twitter glove or common glove use ty='twitter' and ty='common' print(ty) Xi_train, yi_train = get_feature(train, vocab, feature_list=[feature], mode=[mode],split="final_train"+str(i),ty=[ty]) ## [29010,3,emb_size] 3 is number of sentence # Xi_val, yi_val = get_feature(val, vocab, feature_list=[feature], mode=[mode],split="final_valid"+str(i),ty=[ty]) ## [1150,3,emb_size] Xi_test, _ = get_feature(dev, vocab, feature_list=[feature], mode=[mode],split="final_test"+str(i),ty=[ty]) ## [2755,3,emb_size] # Xi_train = np.concatenate((Xi_train, Xi_val), axis = 0) # yi_train = np.concatenate((yi_train, yi_val), axis = 0) if use_pca: Xi_train, Xi_val, Xi_test = pca(Xi_train,Xi_val,Xi_test) pass # if feature == "bert": # print(Xi_train.shape) # Xi_train = np.squeeze(Xi_train,axis = 2) # Xi_test = np.squeeze(Xi_test,axis = 2) # Xi_val = np.squeeze(Xi_val,axis = 2) # pass if X_train==[]: X_train = Xi_train y_train = yi_train X_test = Xi_test # X_val = Xi_val # y_val = yi_val else: X_train = np.concatenate((X_train, Xi_train), axis = 2) X_test = np.concatenate((X_test, Xi_test), axis = 2) # X_val = np.concatenate((X_val, Xi_val), axis = 2) return X_train, y_train, X_val, y_val, X_test, ind, X_text
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) flags = helper_functions.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode # Set up the test_ratio if flags.data_set == 'ballistics': flags.test_ratio = 0.1 elif flags.data_set == 'sine_wave': flags.test_ratio = 0.1 elif flags.data_set == 'robotic_arm': flags.test_ratio = 0.1 # Get the data train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(INN, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print(ntwk.ckpt_dir) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: ntwk.evaluate_multiple_time() else: pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution if flags.data_set != 'meta_material' and not multi_flag: plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished") # If gaussian, plot the scatter plot if flags.data_set == 'gaussian_mixture': Xpred = helper_functions.get_Xpred(path='data/', name=flags.eval_model) Ypred = helper_functions.get_Ypred(path='data/', name=flags.eval_model) # Plot the points scatter generate_Gaussian.plotData(Xpred, Ypred, save_dir='data/' + flags.eval_model.replace('/','_') + 'generation plot.png', eval_mode=True)
def get_single_feature_for_svm(feature, ty, i): ## prepare data for feature-10 folders vocab = generate_vocab() train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=constant.dev_with_label, include_test=constant.include_test) ## feature_list: glove emoji elmo bert deepmoji emo2vec ## if you want twitter glove or common glove use ty='twitter' and ty='common' X_train, y_train = get_feature(train, vocab, feature_list=[feature], mode=['sum'],split="train",ty=ty) ## [29010,3,emb_size] 3 is number of sentence X_test, y_test = get_feature(val, vocab, feature_list=[feature], mode=['sum'],split="valid",ty=ty) ## [1150,3,emb_size] X_train_reduced, X_test_reduced, _ = pca(X_train, X_test) return X_train_reduced, y_train, X_test_reduced, y_test
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) if model_dir.startswith('/'): # It is a absolute path flags = helper_functions.load_flags(model_dir) else: flags = helper_functions.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.test_ratio = get_test_ratio_helper(flags) # 2020.10.10 only, delete afterward flags.test_ratio *= 2 # Get the data train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(MDN, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print(model_dir) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: ntwk.evaluate_multiple_time() else: pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution if flags.data_set != 'meta_material' and not multi_flag: plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
def predict_from_model(pre_trained_model, Xpred_file, no_plot=True): """ Predicting interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param Xpred_file: The Prediction file position :param no_plot: If True, do not plot (For multi_eval) :return: None """ # Retrieve the flag object print("This is doing the prediction for file", Xpred_file) print("Retrieving flag object for parameters") if (pre_trained_model.startswith("models")): eval_model = pre_trained_model[7:] print("after removing prefix models/, now model_dir is:", eval_model) flags = load_flags(pre_trained_model) # Get the pre-trained model flags.eval_model = pre_trained_model # Reset the eval mode flags.test_ratio = 0.1 #useless number # Get the data, this part is useless in prediction but just for simplicity train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(Backprop, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if not no_plot: # Plot the MSE distribution pred_file, truth_file = ntwk.predict(Xpred_file, no_save=False) flags.eval_model = pred_file.replace( '.', '_') # To make the plot name different plotMSELossDistrib(pred_file, truth_file, flags) else: pred_file, truth_file = ntwk.predict(Xpred_file, no_save=True) print("Evaluation finished") return pred_file, truth_file, flags
def evaluate_forward_model(dirx, n_samples, invs=False): print("DIRECTORY: ", dirx) flags = load_flags(dirx) flags.batch_size = 1 train_loader, test_loader = data_reader.read_data(flags) GEN = GA(flags, train_loader, test_loader, inference_mode=True, saved_model=dirx) GEN.model.eval() avg_mse, avg_mre, avg_rse = 0, 0, 0 for i, (g, s) in enumerate(test_loader): if invs: z = s s = g g = z g = g.cuda() s = s.cuda() ps = GEN.model(g) if invs: pg = ps z = g g = s s = z ps = simulator(flags.data_set, pg.cpu().data.numpy()) ps = torch.from_numpy(ps).cuda() mse = torch.nn.functional.mse_loss(s, ps) rse = torch.sqrt(torch.sum(torch.pow(s - ps, 2))) / torch.sqrt( torch.sum(torch.pow(s, 2))) mre = torch.mean(torch.abs(torch.div(s - ps, s))) avg_mse += mse.item() avg_rse += rse.item() avg_mre += mre.item() if i == (n_samples - 1): print('BROKE at sample {}'.format(i)) break avg_mse /= n_samples avg_mre /= n_samples avg_rse /= n_samples print("\nMSE:\t{}\nMRE:\t{}\nRSE:\t{}".format(avg_mse, avg_mre, avg_rse))
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, test_ratio=None): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) flags = helper_functions.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.batch_size = 1 flags.backprop_step=300 flags.eval_batch_size=2048 if test_ratio is None: flags.test_ratio = get_test_ratio_helper(flags) else: # To make the test ratio swipe with respect to inference time # also making the batch size large enough flags.test_ratio = test_ratio # Get the data train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(make_cINN_and_NA, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) #print(model_dir) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model_cINN.parameters() if p.requires_grad) print(pytorch_total_params) pytorch_total_params = sum(p.numel() for p in ntwk.model_NA.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/NIPS_multi_eval_backup/multi_eval/hybrid_cINN_NA_0bp/'+flags.data_set, save_all=True) else: pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution if flags.data_set != 'meta_material' and not multi_flag: plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
def get_multi_features(features, i, emb_dim, use_pca=False): X_train, y_train, X_test, y_test = [],[],[],[] for item in features: ## distinguish twitter glove and common glove ## distinguish deepmoji sum and avg feature, ty, mode = featureAnalysis(item) if item == features[-2]: constant.emb_dim = emb_dim[0] elif item == features[-1]: constant.emb_dim = emb_dim[1] else: constant.emb_dim = 300 print(feature) ## prepare data for feature-10 folders vocab = generate_vocab() train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=constant.dev_with_label, include_test=constant.include_test) ## feature_list: glove emoji elmo bert deepmoji emo2vec ## if you want twitter glove or common glove use ty='twitter' and ty='common' split_train = "merged_train"+str(i) if constant.include_test else "train"+str(i) split_val = "merged_val"+str(i) if constant.include_test else "valid"+str(i) print("Loading split", split_train) Xi_train, yi_train = get_feature(train, vocab, feature_list=[feature], mode=[mode],split=split_train,ty=ty) ## [29010,3,emb_size] 3 is number of sentence Xi_test, yi_test = get_feature(val, vocab, feature_list=[feature], mode=[mode],split=split_val,ty=ty) ## [1150,3,emb_size] if use_pca: Xi_train, Xi_test, _ = pca(Xi_train, Xi_test) pass if feature == "bert": Xi_train = np.squeeze(Xi_train,axis = 2) Xi_test = np.squeeze(Xi_test,axis = 2) pass if X_train==[]: X_train = Xi_train y_train = yi_train X_test = Xi_test y_test = yi_test else: X_train = np.concatenate((X_train, Xi_train), axis = 2) X_test = np.concatenate((X_test, Xi_test), axis = 2) return X_train, y_train, X_test, y_test
def training_from_flag(flags): """ Training interface. 1. Read data 2. initialize network 3. train network 4. record flags :param flag: The training flags read from command line or parameter.py :return: None """ # Get the data train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(VAE, flags, train_loader, test_loader) # Training process print("Start training now...") ntwk.train() # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
def evaluate_from_model(model_dir): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") flags = flag_reader.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 # Get the data train_loader, test_loader = data_reader.read_data( x_range=flags.x_range, y_range=flags.y_range, geoboundary=flags.geoboundary, batch_size=flags.batch_size, normalize_input=flags.normalize_input, data_dir=flags.data_dir) print("Making network now") # Make Network ntwk = Network(Forward, Backward, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) # Evaluation process print("Start eval now:") pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False, save_Simulator_Ypred=True): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.backprop_step = eval_flags.backprop_step flags.test_ratio = get_test_ratio_helper(flags) if flags.data_set == 'meta_material': save_Simulator_Ypred = False print("this is MM dataset, setting the save_Simulator_Ypred to False") flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 if flags.data_set == 'chen': flags.lr = 0.01 else: flags.lr = 0.5 flags.train_step = eval_flags.train_step print(flags) # Get the data train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(NA, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: pred_file, truth_file = ntwk.evaluate( save_dir='../multi_eval/NA/' + flags.data_set, save_all=True, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) else: pred_file, truth_file = ntwk.evaluate( save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) # Plot the MSE distribution makePlots(pred_file, truth_file, flags, quantiles=[0.05, 0.25, 0.5, 0.75, 0.95]) print("Evaluation finished")
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False, save_Simulator_Ypred=True, init_lr=0.5, BDY_strength=1): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.test_ratio = get_test_ratio_helper(flags) if flags.data_set == 'Peurifoy': flags.eval_batch_size = 10000 elif flags.data_set == 'Chen': flags.eval_batch_size = 10000 elif flags.data_set == 'Yang' or flags.data_set == 'Yang_sim': flags.eval_batch_size = 2000 flags.batch_size = flags.eval_batch_size flags.lr = init_lr flags.BDY_strength = BDY_strength flags.eval_batch_size = eval_flags.eval_batch_size flags.train_step = eval_flags.train_step # delete after usage: 02.07 for vilidating that ball and sine is convex problem # Use a very small eval batch size and expected to see that meta and robo getting much worse performance # and the ball and sine getting nearly identical one # flags.eval_batch_size = 2 print(flags) flags.batch_size = 500 # Get the data eval_data_all = True train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("LENGTH: ", len(test_loader)) print("Making network now") # Make Network ntwk = Network(Forward, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: dest_dir = '/home/sr365/MM_bench_multi_eval/NA/' #dest_dir = '/data/users/ben/multi_eval/NA_lr' + str(init_lr) + 'bdy_' + str(BDY_strength)+'/' if not os.path.isdir(dest_dir): os.mkdir(dest_dir) dest_dir += flags.data_set if not os.path.isdir(dest_dir): os.mkdir(dest_dir) #pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/multi_eval/NA/' + flags.data_set, save_all=True, pred_file, truth_file = ntwk.evaluate( save_dir=dest_dir, save_all=True, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) else: pred_file, truth_file = ntwk.evaluate( save_dir='data/' + flags.data_set, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) # Plot the MSE distribution plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished")
from torch.utils import data import numpy as np import utils.data_reader as data_reader import utils.glove_pruner as glove_pruner from tqdm import tqdm import torch from torch.autograd import Variable DATA_PATH = "data/" #change to "../data" if want to run locally TRAINING_SET_FILE = DATA_PATH + "beer-ratings/train.csv" WORD_TO_EMBEDDING_FILE = DATA_PATH + "glove_prunned.txt" REVIEWS_FIELD = "review/text" TRAIN_SET = data_reader.read_data(TRAINING_SET_FILE) WORD_TO_EMBEDDING = glove_pruner.load_words(WORD_TO_EMBEDDING_FILE) class Database(object): TOKENS_FIELD = "processed/tokens" EMBEDDINGS_FIELD = "processed/embeddings" def __init__(self): raw_train_set_length = len(TRAIN_SET[REVIEWS_FIELD]) train_set_end = int(0.7 * raw_train_set_length) dev_set_end = int(0.9 * raw_train_set_length) self.train_set = BeerReviewsDataset(TRAIN_SET, start=0, end=train_set_end) self.dev_set = BeerReviewsDataset(TRAIN_SET, start=train_set_end, end=dev_set_end) self.test_set = BeerReviewsDataset(TRAIN_SET, start=dev_set_end, end=raw_train_set_length)
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False, save_Simulator_Ypred=False, preset_flag=None,init_lr=0.5, BDY_strength=1): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.generations = eval_flags.generations flags.test_ratio = get_test_ratio_helper(flags) if flags.data_set == 'Yang_sim': save_Simulator_Ypred = False print("this is MM dataset, setting the save_Simulator_Ypred to False") flags = preset_flag if preset_flag else flags flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 print(flags) eval_data_all=True # Get the data train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all) print("Making network now") # Make Network Genetic_Algorithm = GA(flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) # Evaluation process print("Start eval now:") dname = flags.save_to #try: # os.mkdir(dname) #except(Exception): # pass if multi_flag: dest_dir = './temp-dat/'+dname+'/' if not os.path.isdir(dest_dir): os.mkdir(dest_dir) dest_dir += flags.data_set if not os.path.isdir(dest_dir): os.mkdir(dest_dir) pred_file, truth_file = Genetic_Algorithm.evaluate(save_dir=dest_dir, save_all=True, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) else: pred_file, truth_file = Genetic_Algorithm.evaluate(save_misc=save_misc, save_dir=dname,MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) # Plot the MSE distribution #plotMSELossDistrib(pred_file, truth_file, flags,save_dir=dname) print("Evaluation finished")
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, modulized_flag=False): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) flags = helper_functions.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.test_ratio = get_test_ratio_helper(flags) # Get the data train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(INN, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print(ntwk.ckpt_dir) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if modulized_flag: ntwk.evaluate_modulized_multi_time() elif multi_flag: ntwk.evaluate_multiple_time() else: pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution if flags.data_set != 'Yang_sim' and not multi_flag and not modulized_flag: # meta-material does not have simulator, hence no Ypred given MSE = plotMSELossDistrib(pred_file, truth_file, flags) # Add this MSE back to the folder flags.best_validation_loss = MSE helper_functions.save_flags(flags, os.path.join("models", model_dir)) elif flags.data_set == 'Yang_sim' and not multi_flag and not modulized_flag: # Save the current path for getting back in the future cwd = os.getcwd() abs_path_Xpred = os.path.abspath(pred_file.replace('Ypred', 'Xpred')) # Change to NA dictory to do prediction os.chdir('../NA/') MSE = predict.ensemble_predict_master('../Data/Yang_sim/state_dicts/', abs_path_Xpred, no_plot=False) # Add this MSE back to the folder flags.best_validation_loss = MSE os.chdir(cwd) helper_functions.save_flags(flags, os.path.join("models", model_dir)) print("Evaluation finished")
from utils import data_reader from utils.helper_functions import save_flags, load_flags, simulator sets = ["Peurifoy", "Chen", "Yang_sim"] folder = 'loglog-scatter-vline-under-20-mse' if not os.path.exists('one-to-many/' + folder): os.mkdir('one-to-many/' + folder) for dset in sets: flags = flag_reader.read_flag() flags.data_set = dset flags.model_name = flags.data_set.lower() flags.eval_model = flags.model_name train_loader, test_loader = data_reader.read_data(flags, eval_data_all=True) geo = None spect = None for g, s in test_loader: if geo is None: geo = g.data.numpy() spect = s.data.numpy() else: geo = np.vstack((geo, g.data.numpy())) spect = np.vstack((spect, s.data.numpy())) geo = torch.from_numpy(np.array(geo)).cuda() spect = torch.from_numpy(np.array(spect)).cuda() for i in range(len(geo)):
feature = item[:(item.find('-') - 1)] else: ty = 'common' feature = item ## compute Micro F1 score for each feature for j in range(1, 10): c = j / 1000 model = get_classifier(ty='LR', c=c) microF1s = 0 for i in range(constant.num_split): ## prepare data for feature-10 folders vocab = generate_vocab() train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i) ## feature_list: glove emoji elmo bert deepmoji emo2vec ## if you want twitter glove or common glove use ty='twitter' and ty='common' X_train, y_train = get_feature( train, vocab, feature_list=[feature], mode=['sum'], split="train", ty=ty) ## [29010,3,emb_size] 3 is number of sentence X_test, y_test = get_feature(val, vocab, feature_list=[feature], mode=['sum'], split="valid", ty=ty) ## [1150,3,emb_size]
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False, save_Simulator_Ypred=True, init_lr=0.01, lr_decay=0.9, BDY_strength=1, save_dir='data/', noise_level=0, md_coeff=0, md_start=None, md_end=None, md_radius=None, eval_batch_size=None): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.test_ratio = get_test_ratio_helper(flags) flags.backprop_step = eval_flags.backprop_step #flags.test_ratio = 0.02 if flags.data_set != None: #== 'Yang_sim': save_Simulator_Ypred = False print("this is Yang sim dataset, setting the save_Simulator_Ypred to False") flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 flags.BDY_strength = BDY_strength flags.train_step = eval_flags.train_step flags.backprop_step = 300 # MD Loss: new version if md_coeff is not None: flags.md_coeff = md_coeff if md_start is not None: flags.md_start = md_start if md_end is not None: flags.md_end = md_end if md_radius is not None: flags.md_radius = md_radius ############################# Thing that are changing ######################### flags.lr = init_lr flags.lr_decay_rate = lr_decay flags.eval_batch_size = 2048 if eval_batch_size is None else eval_batch_size flags.optim = 'Adam' ############################################################################### print(flags) # if flags.data_set == 'Peurifoy': # flags.eval_batch_size = 10000 # elif flags.data_set == 'Chen': # flags.eval_batch_size = 10000 # elif flags.data_set == 'Yang' or flags.data_set == 'Yang_sim': # flags.eval_batch_size = 2000 # # flags.batch_size = flags.eval_batch_size # Get the data train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(NA, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # pred_file, truth_file = ntwk.validate_model(save_dir='data/' + flags.data_set+'_best_model', save_misc=save_misc, # MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) # Evaluation process print("Start eval now:") if multi_flag: #dest_dir = '/home/sr365/mm_bench_multi_eval_Chen_sweep/NA_init_lr_{}_decay_{}_batch_{}_bp_{}_noise_lvl_{}/'.format(init_lr, lr_decay, flags.eval_batch_size, flags.backprop_step, noise_level) #dest_dir = '/home/sr365/mm_bench_compare_MDNA_loss/NA_init_lr_{}_decay_{}_MD_loss_{}'.format(flags.lr, flags.lr_decay_rate, flags.md_coeff) #dest_dir = '/home/sr365/MM_bench_multi_eval/NA_RMSprop/' #dest_dir = '/data/users/ben/multi_eval/NA_lr' + str(init_lr) + 'bdy_' + str(BDY_strength)+'/' dest_dir = os.path.join('/home/sr365/MDNA_temp/', save_dir) dest_dir = os.path.join(dest_dir, flags.data_set) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) #pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/multi_eval/NA/' + flags.data_set, save_all=True, pred_file, truth_file = ntwk.evaluate(save_dir=dest_dir, save_all=True, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred, noise_level=noise_level) else: # Creat the directory is not exist if not os.path.isdir(save_dir): os.makedirs(save_dir) pred_file, truth_file = ntwk.evaluate(save_dir=save_dir, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred, noise_level=noise_level) #pred_file, truth_file = ntwk.evaluate(save_dir='data/'+flags.data_set,save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) if 'Yang' in flags.data_set: return # Plot the MSE distribution MSE = plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished") return MSE
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False, save_Simulator_Ypred=False): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :param multi_flag: The switch to turn on if you want to generate all different inference trial results :param eval_data_all: The switch to turn on if you want to put all data in evaluation data :return: None """ # Retrieve the flag object print("Retrieving flag object for parameters") if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print(model_dir) flags = load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode flags.backprop_step = eval_flags.backprop_step if flags.data_set == 'ballistics': flags.test_ratio = 0.0078 # 12800 in total elif flags.data_set == 'sine_wave': flags.test_ratio = 0.001 # 8000 in total elif flags.data_set == 'robotic_arm': flags.test_ratio = 0.1 # 10000 in total else: flags.test_ratio = 0.0051062 / 2 #flags.test_ratio = 0 #flags.test_ratio = 0.00025 # 20000 in total for Meta material flags.batch_size = 1 # For backprop eval mode, batchsize is always 1 flags.lr = 1e-2 if flags.data_set == 'ballistics': flags.lr = 1 flags.train_step = eval_flags.train_step for i in range(4000, 5000, 2000): for j in range(3): flags.eval_batch_size = i # Get the data train_loader, test_loader = data_reader.read_data( flags, eval_data_all=eval_data_all) print("Making network now") # Make Network ntwk = Network(Backprop, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) print("number of trainable parameters is :") pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad) print(pytorch_total_params) # Evaluation process print("Start eval now:") if multi_flag: pred_file, truth_file = ntwk.evaluate( save_dir='D:/Yang_MM_Absorber_ML/NA/' + flags.data_set, save_all=True, save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) else: pred_file, truth_file = ntwk.evaluate( save_dir='D:/Yang_MM_Absorber_ML/Backprop/data/' + str(i) + '/' + str(j + 1), save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred) # Plot the MSE distribution plotMSELossDistrib( pred_file, truth_file, flags, save_dir='D:/Yang_MM_Absorber_ML/Backprop/data/' + str(i) + '/' + str(j + 1)) print("Evaluation finished")