def experiment(state, channel): DS = Dataset(is_binary=True) DS.setup_dataset(data_path=state.dataset) kfoldCrossValidation = KfoldCrossvalidation(no_of_folds=state.no_of_folds) cs_args = { "train_args": { "L1_reg": state.l1_reg, "learning_rate": state.learning_rate, "L2_reg": state.l2_reg, "nepochs": state.n_epochs, "cost_type": state.cost_type, "save_exp_data": state.save_exp_data, "batch_size": state.batch_size }, "test_args": { "save_exp_data": state.save_exp_data, "batch_size": state.batch_size } } post_input = T.matrix('post_input') mlp = PostMLP(post_input, n_in=state.n_in, n_hiddens=state.n_hiddens, n_out=state.n_out, n_hidden_layers=state.n_hidden_layers, is_binary=True, exp_id=state.exid) valid_errs, test_errs = kfoldCrossValidation.crossvalidate(DS.Xtrain, \ DS.Ytrain, DS.Xtest, DS.Ytest, mlp, **cs_args) errors = \ kfoldCrossValidation.get_best_valid_scores(valid_errs, test_errs) state.best_valid_error = errors["valid_scores"]["error"] state.best_test_error = errors["test_scores"]["error"] return channel.COMPLETE
def experiment(state, channel): DS = Dataset(is_binary=True) DS.setup_dataset(data_path=state.dataset) kfoldCrossValidation = KfoldCrossvalidation(no_of_folds=state.no_of_folds) cs_args = { "train_args":{ "L1_reg": state.l1_reg, "learning_rate": state.learning_rate, "L2_reg": state.l2_reg, "nepochs":state.n_epochs, "cost_type": state.cost_type, "save_exp_data": state.save_exp_data, "batch_size": state.batch_size }, "test_args":{ "save_exp_data": state.save_exp_data, "batch_size": state.batch_size } } post_input = T.matrix('post_input') mlp = PostMLP(post_input, n_in=state.n_in, n_hiddens=state.n_hiddens, n_out=state.n_out, n_hidden_layers=state.n_hidden_layers, is_binary=True, exp_id=state.exid) valid_errs, test_errs = kfoldCrossValidation.crossvalidate(DS.Xtrain, \ DS.Ytrain, DS.Xtest, DS.Ytest, mlp, **cs_args) errors = \ kfoldCrossValidation.get_best_valid_scores(valid_errs, test_errs) state.best_valid_error = errors["valid_scores"]["error"] state.best_test_error = errors["test_scores"]["error"] return channel.COMPLETE
def incremental_data_experiment(prmlp, train_datasets, test_datasets, no_of_patches=64, patch_size=(8, 8), **kwargs): ds_train = Dataset() ds_test = Dataset() costs = [] test_scores = [] test_ds_name = data_dir + test_datasets[0] + file_suffix print "Loading the test dataset" ds_test.setup_pretraining_dataset(data_path=test_ds_name, train_split_scale=0.5, patch_size=patch_size, normalize_inputs=False) """ Perform the test on test dataset for each learnt training dataset. """ for x_t_idx in xrange(len(train_datasets)): train_ds_name = data_dir + train_datasets[x_t_idx] + file_suffix print "Loading the dataset %s " % (train_ds_name) ds_train.setup_pretraining_dataset(data_path=train_ds_name, train_split_scale=1, patch_size=patch_size, normalize_inputs=False) print "Training on the dataset %d " % (x_t_idx) cost = train_prmlp(prmlp, ds_train.Xtrain_patches, ds_train.Xtrain_presences, **kwargs["train_args"]) costs.append(cost) print "Testing on the test dataset." test_scores_per_ds = test_prmlp(prmlp, ds_test.Xtest_patches, ds_test.Xtest_presences, no_of_patches, **kwargs["test_args"]) test_score_patch_based = prmlp.obj_patch_error_percent test_scores.append(test_score_patch_based) all_data_dict = { "test_scores": test_scores, "costs": costs } numpy.save("/RQusagers/gulcehre/codes/python/experiments/arcade_ds_exps/pretrained_mlp/prmlp_multi_datasets/out/multi_hidden_mlp_240k_lrate_0.025_3hidden_8x8_1epoch.npy", all_data_dict)
print "starting post-testing on training dataset" post_mlp.test(data=test_train_probs, labels=train_lbls, **post_cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=test_test_probs, labels=test_lbls, **post_cs_args["test_args"]) if __name__ == "__main__": print "Loading the dataset" ds = Dataset() x = T.matrix('x') no_of_patches = 64 no_of_classes = 11 dir = "/RQexec/gulcehre/datasets/pentomino/second_level_ins/" train_file = dir + "train_probs_40k_wlbls.pkl" train_test_file = dir + "test_ontrain_probs_40k_wlbls.pkl" test_file = dir + "test_ontest_probs_40k_wlbls.pkl" train_data, train_lbls = load_file(train_file) test_train_data = load_file(train_test_file)[0] test_test_data, test_lbls = load_file(test_file)
pre_test_test_probs = get_summed_probs(normalize_data(pre_test_test_probs)) print "Training post-mlp" post_mlp.train(data=pre_train_probs, labels=train_set_labels, **post_cs_args["train_args"]) print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_set_labels, **post_cs_args["test_args"]) print "starting post-testing on the test dataset" post_mlp.test(data=pre_test_test_probs, labels=test_set_labels, **post_cs_args["test_args"]) if __name__=="__main__": print "Loading the dataset" ds = Dataset() data_new =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_20k_64patches_seed_112168712_64patches.npy" data_new_40k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_40k_64patches_seed_975168712_64patches.npy" data_new_60k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_60k_64patches_seed_975168712_64patches.npy" data_new_100k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_100k_seed_98313722_64patches.npy" patch_size=(8, 8) ds.setup_pretraining_dataset(data_path=data_new_60k, patch_size=patch_size, train_split_scale=0.8, normalize_inputs=False)
print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_set_labels, **post_cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=pre_test_test_probs, labels=test_set_labels, **post_cs_args["test_args"]) if __name__ == "__main__": print "Loading the dataset" ds = Dataset() data_path_40k = "/RQusagers/gulcehre/dataset/pentomino/pieces/pento64x64_40k_seed_39112222.npy" data_path = "/RQusagers/gulcehre/dataset/pentomino/experiment_data/pento64x64_80k_seed_39112222.npy" patch_size = (8, 8) ds.setup_pretraining_dataset(data_path=data_path_40k, patch_size=patch_size, normalize_inputs=False) x = T.matrix('x') n_hiddens = [1024, 768] no_of_patches = 64 no_of_classes = 11 prmlp = PatchBasedMLP(x, n_in=patch_size[0] * patch_size[1],
def pre_training_on_multi_datasets(patch_mlp=None, post_mlp=None, train_ds=None, test_ds=None): ds = Dataset() print "Loading the dataset" train_set_patches, train_set_pre, train_set_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain test_set_patches, test_set_pre, test_set_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytrain cs_args = { "train_args": { "L1_reg": 1e-6, "learning_rate": 0.75, "L2_reg": 1e-5, "nepochs": 2, "cost_type": "crossentropy", "save_exp_data": False, "batch_size": 250, "normalize_weights": False, }, "test_args": {"save_exp_data": False, "batch_size": 250}, } post_cs_args = { "train_args": { "L1_reg": 1e-6, "learning_rate": 0.08, "L2_reg": 1e-5, "nepochs": 10, "cost_type": "crossentropy", "save_exp_data": False, "batch_size": 250, "normalize_weights": False, }, "test_args": {"save_exp_data": False, "batch_size": 250}, } test_dataset = test_ds[0] ds.setup_pretraining_dataset(data_path=test_dataset, train_split_scale=0.4, patch_size=(8, 8)) test_patches, test_pre, test_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytest for i in xrange(len(train_ds)): print "Current training dataset is %s \n" % (train_ds[i]) ds.setup_pretraining_dataset(data_path=train_ds[i], train_split_scale=1.0, patch_size=(8, 8)) train_patches, train_pre, train_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain test_labels = get_binary_labels(test_labels) train_labels = get_binary_labels(train_labels) print "Starting the pretraining phase." (costs, pre_train_probs) = prmlp.train(train_patches, train_pre, **cs_args["train_args"]) prmlp.save_data() print "Testing on the training dataset." (costs, pre_test_train_probs) = prmlp.test(train_patches, train_pre, **cs_args["test_args"]) print "Testing on the test dataset." (costs, pre_test_test_probs) = prmlp.test(test_patches, test_pre, **cs_args["test_args"]) print "Normalizing patch-mlp's outputs" pre_train_probs = normalize_data(pre_train_probs) pre_test_train_probs = normalize_data(pre_test_train_probs) pre_test_test_probs = normalize_data(pre_test_test_probs) print "Training post-mlp" post_mlp.train(data=pre_train_probs, labels=train_labels, **post_cs_args["train_args"]) print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_labels, **post_cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=pre_test_test_probs, labels=test_labels, **post_cs_args["test_args"])
pre_test_train_probs = normalize_data(pre_test_train_probs) pre_test_test_probs = normalize_data(pre_test_test_probs) post_mlp.train(data=pre_train_probs, labels=train_set_labels, **cs_args["train_args"]) print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_set_labels, **cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=pre_test_test_probs, labels=test_set_labels, **cs_args["test_args"]) if __name__=="__main__": print "Loading the dataset" ds = Dataset() data_path_40k = "/RQusagers/gulcehre/dataset/pentomino/pieces/pento64x64_40k_seed_39112222.npy" data_path = "/RQusagers/gulcehre/dataset/pentomino/experiment_data/pento64x64_80k_seed_39112222.npy" data_new_60k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_60k_64patches_seed_975168712_64patches.npy" patch_size=(8, 8) ds.setup_pretraining_obj_patch_dataset(data_path=data_new_60k, patch_size=patch_size, normalize_inputs=False) x = T.matrix('x') n_hiddens = [2048] no_of_patches = 3 no_of_classes = 11 prmlp = PatchBasedMLP(x, n_in=patch_size[0] * patch_size[1], n_hiddens=n_hiddens, n_out=11, no_of_patches=no_of_patches, activation=NeuralActivations.Rectifier, use_adagrad=False)
print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_set_labels, **post_cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=pre_test_test_probs, labels=test_set_labels, **post_cs_args["test_args"]) if __name__ == "__main__": print "Loading the dataset" ds = Dataset() data_path_40k =\ "/RQusagers/gulcehre/dataset/pentomino/pieces/pento64x64_40k_seed_39112222.npy" data_path =\ "/RQusagers/gulcehre/dataset/pentomino/experiment_data/pento64x64_80k_seed_39112222.npy" data_new =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_20k_64patches_seed_112168712_64patches.npy" data_new_40k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_40k_64patches_seed_975168712_64patches.npy" data_new_60k =\ "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_60k_64patches_seed_975168712_64patches.npy"
def pre_training_on_multi_datasets(patch_mlp=None, post_mlp=None, train_ds=None, test_ds=None): ds = Dataset() print "Loading the dataset" train_set_patches, train_set_pre, train_set_labels =\ ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain test_set_patches, test_set_pre, test_set_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytrain cs_args = { "train_args": { "L1_reg": 1e-6, "learning_rate": 0.75, "L2_reg": 1e-5, "nepochs": 2, "cost_type": "crossentropy", "save_exp_data": False, "batch_size": 250, "normalize_weights": False }, "test_args": { "save_exp_data": False, "batch_size": 250 } } post_cs_args = { "train_args": { "L1_reg": 1e-6, "learning_rate": 0.08, "L2_reg": 1e-5, "nepochs": 10, "cost_type": "crossentropy", "save_exp_data": False, "batch_size": 250, "normalize_weights": False }, "test_args": { "save_exp_data": False, "batch_size": 250 } } test_dataset = test_ds[0] ds.setup_pretraining_dataset(data_path=test_dataset, train_split_scale=0.4, patch_size=(8, 8)) test_patches, test_pre, test_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytest for i in xrange(len(train_ds)): print "Current training dataset is %s \n" % (train_ds[i]) ds.setup_pretraining_dataset(data_path=train_ds[i], train_split_scale=1.0, patch_size=(8, 8)) train_patches, train_pre, train_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain test_labels = get_binary_labels(test_labels) train_labels = get_binary_labels(train_labels) print "Starting the pretraining phase." (costs, pre_train_probs) = prmlp.train(train_patches, train_pre, **cs_args["train_args"]) prmlp.save_data() print "Testing on the training dataset." (costs, pre_test_train_probs) = prmlp.test(train_patches, train_pre, **cs_args["test_args"]) print "Testing on the test dataset." (costs, pre_test_test_probs) = prmlp.test(test_patches, test_pre, **cs_args["test_args"]) print "Normalizing patch-mlp's outputs" pre_train_probs = normalize_data(pre_train_probs) pre_test_train_probs = normalize_data(pre_test_train_probs) pre_test_test_probs = normalize_data(pre_test_test_probs) print "Training post-mlp" post_mlp.train(data=pre_train_probs, labels=train_labels, **post_cs_args["train_args"]) print "starting post-testing on training dataset" post_mlp.test(data=pre_test_train_probs, labels=train_labels, **post_cs_args["test_args"]) print "starting post-testing on the dataset" post_mlp.test(data=pre_test_test_probs, labels=test_labels, **post_cs_args["test_args"])