示例#1
0
def training_from_flag(flags):
    """
    Training interface. 1. Read data 2. initialize network 3. train network 4. record flags
    :param flag: The training flags read from command line or parameter.py
    :return: None
    """
    if flags.use_cpu_only:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # Get the data
    train_loader, test_loader = data_reader.read_data(flags)

    # Reset the boundary is normalized
    if flags.normalize_input:
        flags.geoboundary_norm = [-1, 1, -1, 1]

    print("Boundary is set at:", flags.geoboundary)
    print("Making network now")

    # Make Network
    ntwk = Network(Forward, flags, train_loader, test_loader)

    # Training process
    print("Start training now...")
    ntwk.train()

    # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct
    write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
示例#2
0
def predict_from_model(pre_trained_model, Xpred_file, shrink_factor=1, save_name=''):
    """
    Predicting interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :return: None
    """
    # Retrieve the flag object
    print("This is doing the prediction for file", Xpred_file)
    print("Retrieving flag object for parameters")
    if (pre_trained_model.startswith("models")):
        eval_model = pre_trained_model[7:]
        print("after removing prefix models/, now model_dir is:", eval_model)
    
    flags = load_flags(pre_trained_model)                       # Get the pre-trained model
    flags.eval_model = eval_model                    # Reset the eval mode

    # Get the data, this part is useless in prediction but just for simplicity
    train_loader, test_loader = data_reader.read_data(flags)
    print("Making network now")

    # Make Network
    ntwk = Network(Backprop, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad)
    print(pytorch_total_params)
    
    # Evaluation process
    print("Start eval now:")
    pred_file, truth_file = ntwk.predict(Xpred_file, save_prefix=save_name + 'shrink_factor' + str(shrink_factor), shrink_factor=shrink_factor)
示例#3
0
def evaluate_from_model(model_dir):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :return: None
    """
    # Retrieve the flag object
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print("Retrieving flag object for parameters")
    flags = flag_reader.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir                    # Reset the eval mode

    # Get the data
    train_loader, test_loader = data_reader.read_data(flags)
    print("Making network now")

    # Make Network
    ntwk = Network(Forward, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)

    # Evaluation process
    print("Start eval now:")
    pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#4
0
def get_dataset_words(filename_list, field):
    words = set()
    for filename in tqdm(filename_list):
        dataset = data_reader.read_data(filename)
        for text in dataset[field]:
            words.update(data_reader.tokenize(text, None))
    return words
示例#5
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.backprop_step = eval_flags.backprop_step
    if flags.data_set == 'ballistics':
        flags.test_ratio = 0.001
    elif flags.data_set == 'sine_wave':
        flags.test_ratio = 0.005
    elif flags.data_set == 'robotic_arm':
        flags.test_ratio = 0.2
    elif flags.data_set == 'sine_test_1d':
        flags.test_ratio = 0.05
    flags.batch_size = 1  # For backprop eval mode, batchsize is always 1
    flags.lr = 0.05
    flags.eval_batch_size = eval_flags.eval_batch_size
    flags.train_step = eval_flags.train_step

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(Backprop,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)
    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        pred_file, truth_file = ntwk.evaluate(
            save_dir='/work/sr365/multi_eval/Backprop/' + flags.data_set,
            save_all=True)
    else:
        pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#6
0
def get_features_for_prediction(features, i, use_pca=False):
    X_train, y_train, X_test, X_val, y_val = [],[],[],[],[]

    for item in features:
        ## distinguish twitter glove and common glove
        ## distinguish deepmoji sum and avg
        feature, ty, mode = featureAnalysis(item)

        if feature=="glove" and ty=="twitter":
            constant.emb_dim = 200
        elif: feature=="emoji":
            pass
        else:
            constant.emb_dim = 300
            pass
        pass

        print(feature)
        ## prepare data for feature-10 folders
        vocab = generate_vocab(include_test=True)
        train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=False, include_test=True)
        ## Add labels to dev_no_lab for getting features
        ind = dev_no_lab[0]
        X_text = dev_no_lab[1]
        labels = ["others" for i in range(len(ind))]
        dev = (ind, X_text, labels)
        
        ## feature_list: glove emoji elmo bert deepmoji emo2vec
        ## if you want twitter glove or common glove use  ty='twitter' and ty='common'
        print(ty)
        Xi_train, yi_train = get_feature(train, vocab, feature_list=[feature], mode=[mode],split="final_train"+str(i),ty=[ty]) ## [29010,3,emb_size] 3 is number of sentence
        # Xi_val, yi_val = get_feature(val, vocab, feature_list=[feature], mode=[mode],split="final_valid"+str(i),ty=[ty]) ## [1150,3,emb_size]        
        Xi_test, _ = get_feature(dev, vocab, feature_list=[feature], mode=[mode],split="final_test"+str(i),ty=[ty]) ## [2755,3,emb_size]

        # Xi_train = np.concatenate((Xi_train, Xi_val), axis = 0)
        # yi_train = np.concatenate((yi_train, yi_val), axis = 0)
        if use_pca:
            Xi_train, Xi_val, Xi_test = pca(Xi_train,Xi_val,Xi_test)
            pass

        # if feature == "bert":
        #     print(Xi_train.shape)
        #     Xi_train = np.squeeze(Xi_train,axis = 2)
        #     Xi_test = np.squeeze(Xi_test,axis = 2)
        #     Xi_val = np.squeeze(Xi_val,axis = 2)
        #     pass
        if X_train==[]:
            X_train = Xi_train
            y_train = yi_train
            X_test = Xi_test
            # X_val = Xi_val
            # y_val = yi_val
        else:
            X_train = np.concatenate((X_train, Xi_train), axis = 2)
            X_test = np.concatenate((X_test, Xi_test), axis = 2)
            # X_val = np.concatenate((X_val, Xi_val), axis = 2)
    return X_train, y_train, X_val, y_val, X_test, ind, X_text
示例#7
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    flags = helper_functions.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir                    # Reset the eval mode

    # Set up the test_ratio
    if flags.data_set == 'ballistics':
        flags.test_ratio = 0.1
    elif flags.data_set == 'sine_wave':
        flags.test_ratio = 0.1
    elif flags.data_set == 'robotic_arm':
        flags.test_ratio = 0.1
    
    # Get the data
    train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(INN, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)
    print(ntwk.ckpt_dir)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad)
    print(pytorch_total_params)

    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        ntwk.evaluate_multiple_time()
    else:
        pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    if flags.data_set != 'meta_material' and not multi_flag: 
        plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
    
    # If gaussian, plot the scatter plot
    if flags.data_set == 'gaussian_mixture':
        Xpred = helper_functions.get_Xpred(path='data/', name=flags.eval_model) 
        Ypred = helper_functions.get_Ypred(path='data/', name=flags.eval_model) 

        # Plot the points scatter
        generate_Gaussian.plotData(Xpred, Ypred, save_dir='data/' + flags.eval_model.replace('/','_') + 'generation plot.png', eval_mode=True)
示例#8
0
def get_single_feature_for_svm(feature, ty, i):
    
    ## prepare data for feature-10 folders
    vocab = generate_vocab()
    train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=constant.dev_with_label, include_test=constant.include_test)
    ## feature_list: glove emoji elmo bert deepmoji emo2vec
    ## if you want twitter glove or common glove use  ty='twitter' and ty='common'
    X_train, y_train = get_feature(train, vocab, feature_list=[feature], mode=['sum'],split="train",ty=ty) ## [29010,3,emb_size] 3 is number of sentence
    X_test, y_test = get_feature(val, vocab, feature_list=[feature], mode=['sum'],split="valid",ty=ty) ## [1150,3,emb_size]
    
    X_train_reduced, X_test_reduced, _ = pca(X_train, X_test)

    return X_train_reduced, y_train, X_test_reduced, y_test
示例#9
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    if model_dir.startswith('/'):  # It is a absolute path
        flags = helper_functions.load_flags(model_dir)
    else:
        flags = helper_functions.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.test_ratio = get_test_ratio_helper(flags)

    # 2020.10.10 only, delete afterward
    flags.test_ratio *= 2

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(MDN,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print(model_dir)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)
    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        ntwk.evaluate_multiple_time()
    else:
        pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    if flags.data_set != 'meta_material' and not multi_flag:
        plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#10
0
def predict_from_model(pre_trained_model, Xpred_file, no_plot=True):
    """
    Predicting interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param Xpred_file: The Prediction file position
    :param no_plot: If True, do not plot (For multi_eval)
    :return: None
    """
    # Retrieve the flag object
    print("This is doing the prediction for file", Xpred_file)
    print("Retrieving flag object for parameters")
    if (pre_trained_model.startswith("models")):
        eval_model = pre_trained_model[7:]
        print("after removing prefix models/, now model_dir is:", eval_model)

    flags = load_flags(pre_trained_model)  # Get the pre-trained model
    flags.eval_model = pre_trained_model  # Reset the eval mode
    flags.test_ratio = 0.1  #useless number

    # Get the data, this part is useless in prediction but just for simplicity
    train_loader, test_loader = data_reader.read_data(flags)
    print("Making network now")

    # Make Network
    ntwk = Network(Backprop,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)
    # Evaluation process
    print("Start eval now:")

    if not no_plot:
        # Plot the MSE distribution
        pred_file, truth_file = ntwk.predict(Xpred_file, no_save=False)
        flags.eval_model = pred_file.replace(
            '.', '_')  # To make the plot name different
        plotMSELossDistrib(pred_file, truth_file, flags)
    else:
        pred_file, truth_file = ntwk.predict(Xpred_file, no_save=True)

    print("Evaluation finished")

    return pred_file, truth_file, flags
示例#11
0
def evaluate_forward_model(dirx, n_samples, invs=False):
    print("DIRECTORY: ", dirx)
    flags = load_flags(dirx)
    flags.batch_size = 1
    train_loader, test_loader = data_reader.read_data(flags)
    GEN = GA(flags,
             train_loader,
             test_loader,
             inference_mode=True,
             saved_model=dirx)

    GEN.model.eval()
    avg_mse, avg_mre, avg_rse = 0, 0, 0
    for i, (g, s) in enumerate(test_loader):
        if invs:
            z = s
            s = g
            g = z

        g = g.cuda()
        s = s.cuda()
        ps = GEN.model(g)

        if invs:
            pg = ps
            z = g
            g = s
            s = z
            ps = simulator(flags.data_set, pg.cpu().data.numpy())
            ps = torch.from_numpy(ps).cuda()

        mse = torch.nn.functional.mse_loss(s, ps)
        rse = torch.sqrt(torch.sum(torch.pow(s - ps, 2))) / torch.sqrt(
            torch.sum(torch.pow(s, 2)))
        mre = torch.mean(torch.abs(torch.div(s - ps, s)))

        avg_mse += mse.item()
        avg_rse += rse.item()
        avg_mre += mre.item()

        if i == (n_samples - 1):
            print('BROKE at sample {}'.format(i))
            break

    avg_mse /= n_samples
    avg_mre /= n_samples
    avg_rse /= n_samples

    print("\nMSE:\t{}\nMRE:\t{}\nRSE:\t{}".format(avg_mse, avg_mre, avg_rse))
示例#12
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, test_ratio=None):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    flags = helper_functions.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir                    # Reset the eval mode
    flags.batch_size = 1
    flags.backprop_step=300
    flags.eval_batch_size=2048

    if test_ratio is None:
        flags.test_ratio = get_test_ratio_helper(flags)
    else:
        # To make the test ratio swipe with respect to inference time
        # also making the batch size large enough
        flags.test_ratio = test_ratio
    # Get the data
    train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(make_cINN_and_NA, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)
    #print(model_dir)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model_cINN.parameters() if p.requires_grad)
    print(pytorch_total_params)
    pytorch_total_params = sum(p.numel() for p in ntwk.model_NA.parameters() if p.requires_grad)
    print(pytorch_total_params)
    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/NIPS_multi_eval_backup/multi_eval/hybrid_cINN_NA_0bp/'+flags.data_set, save_all=True)
    else:
        pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    if flags.data_set != 'meta_material' and not multi_flag: 
        plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#13
0
def get_multi_features(features, i, emb_dim, use_pca=False):
    X_train, y_train, X_test, y_test = [],[],[],[]

    for item in features:
        ## distinguish twitter glove and common glove
        ## distinguish deepmoji sum and avg
        feature, ty, mode = featureAnalysis(item)
        
        if item == features[-2]:
            constant.emb_dim = emb_dim[0]
        elif item == features[-1]:
            constant.emb_dim = emb_dim[1]
        else:
            constant.emb_dim = 300

        print(feature)
        ## prepare data for feature-10 folders
        vocab = generate_vocab()
        train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i, dev_with_label=constant.dev_with_label, include_test=constant.include_test)

        ## feature_list: glove emoji elmo bert deepmoji emo2vec
        ## if you want twitter glove or common glove use  ty='twitter' and ty='common'
        split_train = "merged_train"+str(i) if constant.include_test else "train"+str(i)
        split_val = "merged_val"+str(i) if constant.include_test else "valid"+str(i)

        print("Loading split", split_train)

        Xi_train, yi_train = get_feature(train, vocab, feature_list=[feature], mode=[mode],split=split_train,ty=ty) ## [29010,3,emb_size] 3 is number of sentence
        Xi_test, yi_test = get_feature(val, vocab, feature_list=[feature], mode=[mode],split=split_val,ty=ty) ## [1150,3,emb_size]

        if use_pca:
            Xi_train, Xi_test, _ = pca(Xi_train, Xi_test)
            pass

        if feature == "bert":
            Xi_train = np.squeeze(Xi_train,axis = 2)
            Xi_test = np.squeeze(Xi_test,axis = 2)
            pass
        if X_train==[]:
            X_train = Xi_train
            y_train = yi_train
            X_test = Xi_test
            y_test = yi_test
        else:
            X_train = np.concatenate((X_train, Xi_train), axis = 2)
            X_test = np.concatenate((X_test, Xi_test), axis = 2)
    return X_train, y_train, X_test, y_test
示例#14
0
def training_from_flag(flags):
    """
    Training interface. 1. Read data 2. initialize network 3. train network 4. record flags
    :param flag: The training flags read from command line or parameter.py
    :return: None
    """
    # Get the data
    train_loader, test_loader = data_reader.read_data(flags)
    print("Making network now")

    # Make Network
    ntwk = Network(VAE, flags, train_loader, test_loader)

    # Training process
    print("Start training now...")
    ntwk.train()

    # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct
    write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
示例#15
0
def evaluate_from_model(model_dir):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    flags = flag_reader.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.batch_size = 1  # For backprop eval mode, batchsize is always 1

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        x_range=flags.x_range,
        y_range=flags.y_range,
        geoboundary=flags.geoboundary,
        batch_size=flags.batch_size,
        normalize_input=flags.normalize_input,
        data_dir=flags.data_dir)
    print("Making network now")

    # Make Network
    ntwk = Network(Forward,
                   Backward,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)

    # Evaluation process
    print("Start eval now:")
    pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#16
0
def evaluate_from_model(model_dir,
                        multi_flag=False,
                        eval_data_all=False,
                        save_misc=False,
                        MSE_Simulator=False,
                        save_Simulator_Ypred=True):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.backprop_step = eval_flags.backprop_step
    flags.test_ratio = get_test_ratio_helper(flags)

    if flags.data_set == 'meta_material':
        save_Simulator_Ypred = False
        print("this is MM dataset, setting the save_Simulator_Ypred to False")
    flags.batch_size = 1  # For backprop eval mode, batchsize is always 1
    if flags.data_set == 'chen': flags.lr = 0.01
    else: flags.lr = 0.5

    flags.train_step = eval_flags.train_step

    print(flags)

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(NA,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)

    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        pred_file, truth_file = ntwk.evaluate(
            save_dir='../multi_eval/NA/' + flags.data_set,
            save_all=True,
            save_misc=save_misc,
            MSE_Simulator=MSE_Simulator,
            save_Simulator_Ypred=save_Simulator_Ypred)
    else:
        pred_file, truth_file = ntwk.evaluate(
            save_misc=save_misc,
            MSE_Simulator=MSE_Simulator,
            save_Simulator_Ypred=save_Simulator_Ypred)

    # Plot the MSE distribution
    makePlots(pred_file,
              truth_file,
              flags,
              quantiles=[0.05, 0.25, 0.5, 0.75, 0.95])
    print("Evaluation finished")
示例#17
0
def evaluate_from_model(model_dir,
                        multi_flag=False,
                        eval_data_all=False,
                        save_misc=False,
                        MSE_Simulator=False,
                        save_Simulator_Ypred=True,
                        init_lr=0.5,
                        BDY_strength=1):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.test_ratio = get_test_ratio_helper(flags)

    if flags.data_set == 'Peurifoy':
        flags.eval_batch_size = 10000
    elif flags.data_set == 'Chen':
        flags.eval_batch_size = 10000
    elif flags.data_set == 'Yang' or flags.data_set == 'Yang_sim':
        flags.eval_batch_size = 2000

    flags.batch_size = flags.eval_batch_size
    flags.lr = init_lr
    flags.BDY_strength = BDY_strength
    flags.eval_batch_size = eval_flags.eval_batch_size
    flags.train_step = eval_flags.train_step

    # delete after usage: 02.07 for vilidating that ball and sine is convex problem
    # Use a very small eval batch size and expected to see that meta and robo getting much worse performance
    # and the ball and sine getting nearly identical one
    # flags.eval_batch_size = 2

    print(flags)
    flags.batch_size = 500

    # Get the data
    eval_data_all = True
    train_loader, test_loader = data_reader.read_data(
        flags, eval_data_all=eval_data_all)

    print("LENGTH: ", len(test_loader))

    print("Making network now")

    # Make Network
    ntwk = Network(Forward,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)

    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        dest_dir = '/home/sr365/MM_bench_multi_eval/NA/'
        #dest_dir = '/data/users/ben/multi_eval/NA_lr' + str(init_lr)  + 'bdy_' + str(BDY_strength)+'/'
        if not os.path.isdir(dest_dir):
            os.mkdir(dest_dir)
        dest_dir += flags.data_set
        if not os.path.isdir(dest_dir):
            os.mkdir(dest_dir)
        #pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/multi_eval/NA/' + flags.data_set, save_all=True,
        pred_file, truth_file = ntwk.evaluate(
            save_dir=dest_dir,
            save_all=True,
            save_misc=save_misc,
            MSE_Simulator=MSE_Simulator,
            save_Simulator_Ypred=save_Simulator_Ypred)
    else:
        pred_file, truth_file = ntwk.evaluate(
            save_dir='data/' + flags.data_set,
            save_misc=save_misc,
            MSE_Simulator=MSE_Simulator,
            save_Simulator_Ypred=save_Simulator_Ypred)

    # Plot the MSE distribution
    plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
示例#18
0
from torch.utils import data
import numpy as np
import utils.data_reader as data_reader
import utils.glove_pruner as glove_pruner
from tqdm import tqdm
import torch
from torch.autograd import Variable

DATA_PATH = "data/" #change to "../data" if want to run locally
TRAINING_SET_FILE = DATA_PATH + "beer-ratings/train.csv"
WORD_TO_EMBEDDING_FILE = DATA_PATH + "glove_prunned.txt"

REVIEWS_FIELD = "review/text"

TRAIN_SET = data_reader.read_data(TRAINING_SET_FILE)
WORD_TO_EMBEDDING = glove_pruner.load_words(WORD_TO_EMBEDDING_FILE)


class Database(object):
    TOKENS_FIELD = "processed/tokens"
    EMBEDDINGS_FIELD = "processed/embeddings"
    def __init__(self):
        raw_train_set_length = len(TRAIN_SET[REVIEWS_FIELD])
        train_set_end = int(0.7 * raw_train_set_length)
        dev_set_end = int(0.9 * raw_train_set_length)

        self.train_set = BeerReviewsDataset(TRAIN_SET, start=0, end=train_set_end)
        self.dev_set = BeerReviewsDataset(TRAIN_SET, start=train_set_end, end=dev_set_end)
        self.test_set = BeerReviewsDataset(TRAIN_SET, start=dev_set_end, end=raw_train_set_length)

示例#19
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, MSE_Simulator=False,
                        save_Simulator_Ypred=False, preset_flag=None,init_lr=0.5, BDY_strength=1):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.generations = eval_flags.generations
    flags.test_ratio = get_test_ratio_helper(flags)

    if flags.data_set == 'Yang_sim':
        save_Simulator_Ypred = False
        print("this is MM dataset, setting the save_Simulator_Ypred to False")

    flags = preset_flag if preset_flag else flags
    flags.batch_size = 1 # For backprop eval mode, batchsize is always 1
    print(flags)

    eval_data_all=True
    # Get the data
    train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    Genetic_Algorithm = GA(flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)

    # Evaluation process
    print("Start eval now:")
    dname = flags.save_to

    #try:
    #    os.mkdir(dname)
    #except(Exception):
    #    pass

    if multi_flag:
        dest_dir = './temp-dat/'+dname+'/'
        if not os.path.isdir(dest_dir):
            os.mkdir(dest_dir)
        dest_dir += flags.data_set
        if not os.path.isdir(dest_dir):
            os.mkdir(dest_dir)

        pred_file, truth_file = Genetic_Algorithm.evaluate(save_dir=dest_dir, save_all=True,
                                              save_misc=save_misc, MSE_Simulator=MSE_Simulator,
                                              save_Simulator_Ypred=save_Simulator_Ypred)
    else:
        pred_file, truth_file = Genetic_Algorithm.evaluate(save_misc=save_misc, save_dir=dname,MSE_Simulator=MSE_Simulator,
                                              save_Simulator_Ypred=save_Simulator_Ypred)

    # Plot the MSE distribution
    #plotMSELossDistrib(pred_file, truth_file, flags,save_dir=dname)
    print("Evaluation finished")
示例#20
0
def evaluate_from_model(model_dir,
                        multi_flag=False,
                        eval_data_all=False,
                        modulized_flag=False):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    flags = helper_functions.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode

    flags.test_ratio = get_test_ratio_helper(flags)

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        flags, eval_data_all=eval_data_all)
    print("Making network now")

    # Make Network
    ntwk = Network(INN,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)
    print(ntwk.ckpt_dir)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)

    # Evaluation process
    print("Start eval now:")
    if modulized_flag:
        ntwk.evaluate_modulized_multi_time()
    elif multi_flag:
        ntwk.evaluate_multiple_time()
    else:
        pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    if flags.data_set != 'Yang_sim' and not multi_flag and not modulized_flag:  # meta-material does not have simulator, hence no Ypred given
        MSE = plotMSELossDistrib(pred_file, truth_file, flags)
        # Add this MSE back to the folder
        flags.best_validation_loss = MSE
        helper_functions.save_flags(flags, os.path.join("models", model_dir))
    elif flags.data_set == 'Yang_sim' and not multi_flag and not modulized_flag:
        # Save the current path for getting back in the future
        cwd = os.getcwd()
        abs_path_Xpred = os.path.abspath(pred_file.replace('Ypred', 'Xpred'))
        # Change to NA dictory to do prediction
        os.chdir('../NA/')
        MSE = predict.ensemble_predict_master('../Data/Yang_sim/state_dicts/',
                                              abs_path_Xpred,
                                              no_plot=False)
        # Add this MSE back to the folder
        flags.best_validation_loss = MSE
        os.chdir(cwd)
        helper_functions.save_flags(flags, os.path.join("models", model_dir))
    print("Evaluation finished")
示例#21
0
from utils import data_reader
from utils.helper_functions import save_flags, load_flags, simulator

sets = ["Peurifoy", "Chen", "Yang_sim"]
folder = 'loglog-scatter-vline-under-20-mse'

if not os.path.exists('one-to-many/' + folder):
    os.mkdir('one-to-many/' + folder)

for dset in sets:
    flags = flag_reader.read_flag()
    flags.data_set = dset
    flags.model_name = flags.data_set.lower()
    flags.eval_model = flags.model_name

    train_loader, test_loader = data_reader.read_data(flags,
                                                      eval_data_all=True)
    geo = None
    spect = None

    for g, s in test_loader:
        if geo is None:
            geo = g.data.numpy()
            spect = s.data.numpy()
        else:
            geo = np.vstack((geo, g.data.numpy()))
            spect = np.vstack((spect, s.data.numpy()))

    geo = torch.from_numpy(np.array(geo)).cuda()
    spect = torch.from_numpy(np.array(spect)).cuda()

    for i in range(len(geo)):
        feature = item[:(item.find('-') - 1)]
    else:
        ty = 'common'
        feature = item

    ## compute Micro F1 score for each feature
    for j in range(1, 10):
        c = j / 1000
        model = get_classifier(ty='LR', c=c)

        microF1s = 0
        for i in range(constant.num_split):

            ## prepare data for feature-10 folders
            vocab = generate_vocab()
            train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i)
            ## feature_list: glove emoji elmo bert deepmoji emo2vec
            ## if you want twitter glove or common glove use  ty='twitter' and ty='common'
            X_train, y_train = get_feature(
                train,
                vocab,
                feature_list=[feature],
                mode=['sum'],
                split="train",
                ty=ty)  ## [29010,3,emb_size] 3 is number of sentence
            X_test, y_test = get_feature(val,
                                         vocab,
                                         feature_list=[feature],
                                         mode=['sum'],
                                         split="valid",
                                         ty=ty)  ## [1150,3,emb_size]
示例#23
0
def evaluate_from_model(model_dir, multi_flag=False, eval_data_all=False, save_misc=False, 
                        MSE_Simulator=False, save_Simulator_Ypred=True, 
                        init_lr=0.01, lr_decay=0.9, BDY_strength=1, save_dir='data/',
                        noise_level=0, 
                        md_coeff=0, md_start=None, md_end=None, md_radius=None,
                        eval_batch_size=None):

    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir                    # Reset the eval mode
    flags.test_ratio = get_test_ratio_helper(flags)
    flags.backprop_step = eval_flags.backprop_step
    #flags.test_ratio = 0.02

    if flags.data_set != None: #== 'Yang_sim':
        save_Simulator_Ypred = False
        print("this is Yang sim dataset, setting the save_Simulator_Ypred to False")
    flags.batch_size = 1                            # For backprop eval mode, batchsize is always 1
    flags.BDY_strength = BDY_strength
    flags.train_step = eval_flags.train_step
    flags.backprop_step = 300 

    # MD Loss: new version
    if md_coeff is not None:
        flags.md_coeff = md_coeff
    if md_start is not None:
        flags.md_start = md_start
    if md_end is not None:
        flags.md_end = md_end
    if md_radius is not None:
        flags.md_radius = md_radius

    ############################# Thing that are changing #########################
    flags.lr = init_lr
    flags.lr_decay_rate = lr_decay
    flags.eval_batch_size = 2048 if eval_batch_size is None else eval_batch_size
    flags.optim = 'Adam'
    ###############################################################################
    
    print(flags)

    # if flags.data_set == 'Peurifoy':
    #     flags.eval_batch_size = 10000
    # elif flags.data_set == 'Chen':
    #     flags.eval_batch_size = 10000
    # elif flags.data_set == 'Yang' or flags.data_set == 'Yang_sim':
    #     flags.eval_batch_size = 2000
    #
    # flags.batch_size = flags.eval_batch_size

    # Get the data
    train_loader, test_loader = data_reader.read_data(flags, eval_data_all=eval_data_all)
    print("Making network now")
    
    # Make Network
    ntwk = Network(NA, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model)
    print("number of trainable parameters is :")
    pytorch_total_params = sum(p.numel() for p in ntwk.model.parameters() if p.requires_grad)
    print(pytorch_total_params)

    # pred_file, truth_file = ntwk.validate_model(save_dir='data/' + flags.data_set+'_best_model', save_misc=save_misc,
    #                                       MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred)

    # Evaluation process
    print("Start eval now:")
    if multi_flag:
        #dest_dir = '/home/sr365/mm_bench_multi_eval_Chen_sweep/NA_init_lr_{}_decay_{}_batch_{}_bp_{}_noise_lvl_{}/'.format(init_lr, lr_decay, flags.eval_batch_size, flags.backprop_step, noise_level)
        #dest_dir = '/home/sr365/mm_bench_compare_MDNA_loss/NA_init_lr_{}_decay_{}_MD_loss_{}'.format(flags.lr, flags.lr_decay_rate, flags.md_coeff)
        #dest_dir = '/home/sr365/MM_bench_multi_eval/NA_RMSprop/'
        #dest_dir = '/data/users/ben/multi_eval/NA_lr' + str(init_lr)  + 'bdy_' + str(BDY_strength)+'/' 
        dest_dir = os.path.join('/home/sr365/MDNA_temp/', save_dir)
        dest_dir = os.path.join(dest_dir, flags.data_set)
        if not os.path.isdir(dest_dir):
            os.makedirs(dest_dir)
        #pred_file, truth_file = ntwk.evaluate(save_dir='/work/sr365/multi_eval/NA/' + flags.data_set, save_all=True,
        pred_file, truth_file = ntwk.evaluate(save_dir=dest_dir, save_all=True,
                                                save_misc=save_misc, MSE_Simulator=MSE_Simulator,
                                                save_Simulator_Ypred=save_Simulator_Ypred,
                                                noise_level=noise_level)
    else:
        # Creat the directory is not exist
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
        pred_file, truth_file = ntwk.evaluate(save_dir=save_dir, save_misc=save_misc,
                                             MSE_Simulator=MSE_Simulator, 
                                             save_Simulator_Ypred=save_Simulator_Ypred,
                                             noise_level=noise_level)
        #pred_file, truth_file = ntwk.evaluate(save_dir='data/'+flags.data_set,save_misc=save_misc, MSE_Simulator=MSE_Simulator, save_Simulator_Ypred=save_Simulator_Ypred)

    if 'Yang' in flags.data_set:
        return
    # Plot the MSE distribution
    MSE = plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
    return MSE
def evaluate_from_model(model_dir,
                        multi_flag=False,
                        eval_data_all=False,
                        save_misc=False,
                        MSE_Simulator=False,
                        save_Simulator_Ypred=False):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :param multi_flag: The switch to turn on if you want to generate all different inference trial results
    :param eval_data_all: The switch to turn on if you want to put all data in evaluation data
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    if (model_dir.startswith("models")):
        model_dir = model_dir[7:]
        print("after removing prefix models/, now model_dir is:", model_dir)
    print(model_dir)
    flags = load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.backprop_step = eval_flags.backprop_step
    if flags.data_set == 'ballistics':
        flags.test_ratio = 0.0078  # 12800 in total
    elif flags.data_set == 'sine_wave':
        flags.test_ratio = 0.001  # 8000 in total
    elif flags.data_set == 'robotic_arm':
        flags.test_ratio = 0.1  # 10000 in total
    else:
        flags.test_ratio = 0.0051062 / 2
        #flags.test_ratio = 0
        #flags.test_ratio = 0.00025                        # 20000 in total for Meta material
    flags.batch_size = 1  # For backprop eval mode, batchsize is always 1
    flags.lr = 1e-2
    if flags.data_set == 'ballistics':
        flags.lr = 1

    flags.train_step = eval_flags.train_step

    for i in range(4000, 5000, 2000):
        for j in range(3):
            flags.eval_batch_size = i
            # Get the data
            train_loader, test_loader = data_reader.read_data(
                flags, eval_data_all=eval_data_all)
            print("Making network now")

            # Make Network
            ntwk = Network(Backprop,
                           flags,
                           train_loader,
                           test_loader,
                           inference_mode=True,
                           saved_model=flags.eval_model)
            print("number of trainable parameters is :")
            pytorch_total_params = sum(p.numel()
                                       for p in ntwk.model.parameters()
                                       if p.requires_grad)
            print(pytorch_total_params)

            # Evaluation process
            print("Start eval now:")
            if multi_flag:
                pred_file, truth_file = ntwk.evaluate(
                    save_dir='D:/Yang_MM_Absorber_ML/NA/' + flags.data_set,
                    save_all=True,
                    save_misc=save_misc,
                    MSE_Simulator=MSE_Simulator,
                    save_Simulator_Ypred=save_Simulator_Ypred)
            else:
                pred_file, truth_file = ntwk.evaluate(
                    save_dir='D:/Yang_MM_Absorber_ML/Backprop/data/' + str(i) +
                    '/' + str(j + 1),
                    save_misc=save_misc,
                    MSE_Simulator=MSE_Simulator,
                    save_Simulator_Ypred=save_Simulator_Ypred)

            # Plot the MSE distribution
            plotMSELossDistrib(
                pred_file,
                truth_file,
                flags,
                save_dir='D:/Yang_MM_Absorber_ML/Backprop/data/' + str(i) +
                '/' + str(j + 1))
            print("Evaluation finished")