Пример #1
0
def experiment_mlp_learning():
    """Test if an MLP learns the desired features"""
    trainingset = generate_dataset(N_train)
    testset = generate_dataset(N_test)
    model = MLP(trainingset.layer_sizes)

    # Training
    train_supervised(model, trainingset)

    # Testing
    results = evaluate_model(model, testset)
    return results
Пример #2
0
def experiment_rbm_learning():
    """Train and test an RBM with normal toy data"""
    trainingset = generate_dataset(N_train)
    testset = generate_dataset(N_test)
    model = StackedRBMs(trainingset.layer_sizes)

    # Training
    train_unsupervised(model, trainingset)

    # Testing
    results = evaluate_model(model, testset)
    return results
Пример #3
0
def experiment_rbm_power():
    """Test if an RBM has the representational power to learn the desired
       features"""
    trainingset = generate_dataset(N_train)
    testset = generate_dataset(N_test)
    model = StackedRBMs(trainingset.layer_sizes)

    # Train the model using also the (not so) hidden labels
    train_with_hidden_labels(model, trainingset)

    # Testing
    results = evaluate_model(model, testset)
    return results
Пример #4
0
def sample_small(basedir, solver='Glucose3'):
    """
    Traverses the basedir, looking for instances without their corresponding dataset.
    Generates the dataset for these, using pysat-based sampler
    :param basedir:
    :param solver:
    :return:
    """
    var_sizes = range(10, 101, 10)  # [10,20,...,100]
    for v in var_sizes:
        for cnf in glob.glob(f'{basedir}/v{v}/*/sat*.cnf'):
            if len(glob.glob(f'{cnf}*.pkl.gz')) == 0:
                print(f'Sampling for {cnf}')
                dataset.generate_dataset(cnf, solver)
Пример #5
0
def experiment_mlp_power():
    """Try to find 'ideal' parameters to test if the MLP has the
       representational power to required to learn the desired features.
    """
    trainingset = generate_dataset(N_train)
    testset = generate_dataset(N_test)
    model = MLP(trainingset.layer_sizes)

    # Train the model using also the (not so) hidden labels
    train_with_hidden_labels(model, trainingset)

    # Testing
    results = evaluate_model(model, testset)
    return results
Пример #6
0
def experiment_mlp_power_uncorrelated_features():
    """A variation of experiment_mlp_power.
       Train the hidden layer with an 'uncorrelated' dataset, so it can not
       cheat and learn to detect features by looking for commonly co-occurring
       features.
    """
    uncorrelated_trainingset = generate_uncorrelated_dataset(N_train)
    trainingset = generate_dataset(N_train)
    testset = generate_dataset(N_test)

    model = MLP(trainingset.layer_sizes)

    model.train_bottom(*uncorrelated_trainingset.get_layers()[:-1])
    model.train_top(*trainingset.get_layers()[1:])

    results = evaluate_model(model, testset)
    return results
Пример #7
0
def run():
    dataset.generate_dataset()
    for fs in feature.feature_selection_method:
        for ds in setting.DATASET:
            path = os.path.join(*setting.ARFF_BASE_PATH, ds)
            number_thread = setting.PARALLEL_NUMBER_THREAD.get(ds, -1)
            for (dirpath, dirnames, filenames) in os.walk(path):
                fnames = filter_arff(filenames)
                bar = tqdm.tqdm(total=len(fnames),
                                desc="{} with {}".format(ds, fs.__name__),
                                ascii=True)
                result = Parallel(n_jobs=number_thread,
                                  backend=setting.PARALLEL_BACKEND)(
                                      delayed(compare_models)(ds, f, fs, bar)
                                      for f in fnames)
                to_csv(result, ds, fs.__name__, setting.OUTPUT_PREFIX)
                bar.close()
Пример #8
0
def experiment_data_autocorrelation():
    """Test how the hidden labels in the data correlate among themselves"""
    testset = generate_dataset(N_train)
    ls = testset.get_layers()[1:]

    # Compare the (hidden) labels to themselves
    metrics = compare(ls, ls)

    return locals()
Пример #9
0
def sample_small(basedir, positives, negatives, solver='Glucose3', *var_sizes):
    """
    Traverses the basedir, looking for instances without their corresponding dataset.
    Generates the dataset for these, using pysat-based sampler
    :param basedir: directory where the 'phase' dataset is located
    :param positives: number of positive samples to generate
    :param negatives: number of negative samples to generate
    :param solver: sat solver used to enumerate instances
    :param var_sizes: will look for formulas with the number of variables in this list
    :return:
    """
    if len(var_sizes) == 0:
        var_sizes = range(10, 101, 10)

    for v in var_sizes:
        for cnf in glob.glob(f'{basedir}/v{v}/*/sat*.cnf'):
            if len(glob.glob(f'{cnf}*.pkl.gz')) == 0:
                print(f'Sampling for {cnf}')
                dataset.generate_dataset(cnf, solver, num_positives=positives, num_negatives=negatives)
Пример #10
0
def run_experiment(r, alg, dataset, experiment, params, solver_params):
    print('%s, trial: %i, dataset %s, experiment %s' % (alg, r, dataset, experiment))

    # sample a new dataset
    Sx, Sy_clean, Sy_noise, Tx, Ty = generate_dataset(r, dataset, params)

    # perform learning
    alg_func_name = alg.replace(' ', '_')
    func = eval(alg_func_name)
    params['random_state'] = r
    scores = func(Sx, Sy_clean, Sy_noise, Tx, Ty, params, solver_params)
    output_path = 'output/experiment/%s/%s/%s' % (experiment, dataset, alg_func_name)
    dir_util.mkpath(output_path)
    np.save('%s/%02d.npy' % (output_path, r), scores)
Пример #11
0
def main():
	if(len(sys.argv) < 4) :
		print('Usage : python siamese_nn.py graphfile no_nodes no_features')
		exit()

	graph, no_nodes, no_features = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])	
	dataset, graph_features, edge_index, features = generate_dataset(graph, no_nodes, no_features, siamese=1)
	num_features = len(graph_features[0][1])
	#these parameters can be changed
	size_emb = 64
	batch_size = 64
	val_split = .2

	train_dataloader, val_dataloader = split_dataset(dataset, batch_size, val_split)
	model = SiameseNN(num_features, size_emb).cuda()
	model = train_model(model, train_dataloader, val_dataloader)

	test_model(model, val_dataloader)
Пример #12
0
def experiment_rbm_power_stability():
    """Test if 'ideal' parameters for us also form an optimum for the RBMs

       We first do the test for representational power, and then continue with
       unsupervised training to see whether the parameters stay around the same
       values.
    """
    # First, train with hidden labels to test representational power
    results0 = experiment_rbm_power()
    model = results0['model']
    testset = results0['testset']

    # Then, unsupervised training to see if parameters stay or change
    trainingset1 = generate_dataset(N_train)
    train_unsupervised(model, trainingset1)
    results1 = evaluate_model(model, testset)

    return results0, results1
Пример #13
0
def main():
	if(len(sys.argv) < 4) :
		print('Usage : python gcn.py graphfile no_nodes no_features')
		exit()

	graph, no_nodes, no_features = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])	
	dataset, graph_features, edge_index, gat_features = generate_dataset(graph, no_nodes, no_features)
	num_features = len(graph_features[0][1])
	#these parameters can be changed
	size_emb = 64
	batch_size = 64
	val_split = .2

	train_dataloader, val_dataloader = split_dataset(dataset, batch_size, val_split)
	data = Data(x=torch.tensor(gat_features).float().cuda() , edge_index=edge_index.cuda() , num_nodes=no_nodes)
	model = GAT(num_features, size_emb).cuda() 
	model = train_model(model,train_dataloader, val_dataloader, data)

	test_model(model, val_dataloader, data)
Пример #14
0
Файл: train.py Проект: junix/ner
def train_and_dump(from_model=None,
                   optimizer='sgd',
                   lr=1e-4,
                   rnn_type='lstm',
                   lang_pkl='lang.pt',
                   drop_n=0,
                   real_corpus_sample=0.3):
    if from_model:
        model = EntityRecognizer.load(from_model)
        model_pkl_name = from_model
    else:
        model = EntityRecognizer(lang=Lang.load(lang_pkl),
                                 embedding_dim=200,
                                 rnn_type=rnn_type)
        model.init_params()
        model_pkl_name = 'model.{rnn_type}.{optimizer}'.format(
            rnn_type=rnn_type, optimizer=optimizer)
    model.move_to_device(conf.device())
    dataset = islice(generate_dataset(real_corpus_sample), drop_n, None)
    do_train(model, dataset, model_pkl_name, optimizer=optimizer, lr=lr)
Пример #15
0
def load_model(model_name, data, seed, filename, cuda=False, pretrain=False):
    set_seed(seed)
    drop = 0
    if data == 'real':
        data = load_dataset()
    else:
        n_cell = 300
        n_edge = 3000
        data = generate_dataset(7, n_cell, n_edge, seed)
    model = get_model(model_name, data, drop)

    filename = os.path.join('checkpoints/', filename)
    if cuda:
        checkpoint = torch.load(filename)
    else:
        # Load GPU model on CPU
        checkpoint = torch.load(filename,
                                map_location=lambda storage, loc: storage)
    epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'], strict=not pretrain)
    return epoch, data, model
Пример #16
0
T = 2

#number of bootstrap samples M for validation step
M = 250

#number of maximum clusters to analyze on validation step
max_K = 8

#distance metric used in hierarchical clustering
method = 'ward'

###############################################################################
#           TEMPORAL SEQUENCE GENERATION
###############################################################################
#df_encoded = generate_dataset(5,1)
df_encoded = generate_dataset(50, 1)

################################################################################
##            SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION
################################################################################
concat_for_final_decision = []
for gap in gap_values:

    print('GAP PENALTY:', gap)

    #pairwise sequence alignment results
    results = main_algorithm(df_encoded, gap, T, s, 0)

    #reset indexes
    df_encoded = df_encoded.reset_index()
Пример #17
0
def plot_feature_importance(model, feature_buf):
    feature_important = model.get_score(importance_type='weight')

    feature_important = sorted(feature_important.items(),
                               key=lambda x: x[1],
                               reverse=True)

    keys, vals = [], []
    for item in feature_important[:20]:
        keys.append(feature_buf[int(item[0][1:])])
        vals.append(item[1])

    print(dict(zip(keys, vals)))

    plt.bar(range(len(vals)), vals, width=0.5)
    plt.xticks(range(len(vals)), keys, rotation=90)
    plt.savefig('feature_importance.pdf', bbox_inches='tight', format='pdf')


if __name__ == '__main__':
    X_train, X_test, y_train, y_test, feature_buf = dataset.generate_dataset(
        enhance=False)
    model = train(X_train, X_test, y_train, y_test)

    pre = model.predict(xgb.DMatrix(X_test, label=None))
    auc = roc_auc_score(y_test, pre)
    print(auc)

    plot_feature_importance(model, feature_buf)
Пример #18
0
    }

    #number of times to repeat experiment
    n_experiment = 25
    #count the number of times the procedure retrieves the correct number of clusters
    count_correct = 0

    #REPEAT SEQUENCE GENERATION n_experiment TIMES
    for i in range(0, n_experiment):

        print('number of experiment:', i)
        #initialize list that will contain the auxliary dataframes to be concataneted
        concat = []

        #generate sequences
        df_encoded = generate_dataset(n_sequences, dataset)

        ###########################################################################
        ##            SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION
        ###########################################################################
        concat_for_final_decision = []
        for gap in gap_values:
            #print(gap)

            #pairwise sequence alignment results
            results = main_algorithm(df_encoded, gap, T, s, 0)

            #reset indexes
            df_encoded = df_encoded.reset_index()

            #convert similarity matrix into distance matrix
Пример #19
0
from radam_optimizer import RAdam

from dataset import load_image, generate_dataset

# module to notify training status
from fcm_notifier import FCMNotifier
notifier = FCMNotifier()

# # Load Dataset

# In[11]:

if not GENDER_SENSITIVE:
    # prepare full dataset
    full_mixed_dataset, mixed_train_dataset, mixed_val_dataset, mixed_train_loader, mixed_val_loader = generate_dataset(
        None)
    print('Dataset length: ', len(full_mixed_dataset))
    print('Full ds item: ', full_mixed_dataset[0]['images'].shape,
          full_mixed_dataset[0]['labels'].shape)

else:
    # prepare male dataset
    full_male_dataset, male_train_dataset, male_val_dataset, male_train_loader, male_val_loader = generate_dataset(
        True)
    print('Male dataset length: ', len(full_male_dataset))
    print('Male ds item: ', full_male_dataset[0]['images'].shape,
          full_male_dataset[0]['labels'].shape)

    # prepare female dataset
    full_female_dataset, female_train_dataset, female_val_dataset, female_train_loader, female_val_loader = generate_dataset(
        False)
Пример #20
0
parser = argparse.ArgumentParser()
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()

torch.cuda.set_device(args.local_rank)
device = torch.device(f'cuda:{args.local_rank}')
torch.distributed.init_process_group('nccl', init_method='env://')

gpu = misc.auto_select_gpu(num_gpu=dist.get_world_size(), selected_gpus=None)

num_eopchs = 20
batch_size = 128
lr = 0.01
momentum = 0.5

train_loader, test_loader = dataset.generate_dataset(batch_size=batch_size)
model = model.LeNet()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
model.to(device)

model = torch.nn.parallel.DistributedDataParallel(
    model, device_ids=[args.local_rank], output_device=args.local_rank)

# train
for eopch in range(num_eopchs):
    train(model, train_loader, optimizer, eopch)
    test(model, test_loader)
a = time.time() - time_
print("{:.2f}s".format(a))
torch.save(model.state_dict(), "mnist_lenet.pt")