if __name__ == '__main__': params = config.load_parameters(sys.argv[1]) dataset = load_data(params.dataset, resize_to = params.resize_data_to, shared = False, pickled = params.pickled) x = T.matrix('x') y = T.ivector('y') index = T.lscalar('index') method = params.method method.prepare(params,dataset) train_set = method.resampler.get_train() valid_set = method.resampler.get_valid() test_set = method.resampler.get_test() test_set_x, test_set_y = test_set shared_dataset = [train_set,valid_set,test_set] continuations = dill.load(open(sys.argv[2])) members = [] i = 0 for c in continuations: print "training member {0}".format(i) m = test_mlp(shared_dataset, params, continuation = c, x=x, y=y, index=index) members.append(m.get_weights()) m.clear() del m gc.collect() i += 1 dill.dump(members,open(sys.argv[3],"wb"))
print "saving results to {0}@{1}".format(params.results_db, host) conn = MongoClient(host=host) db = conn[params.results_db] if 'results_table' in params.__dict__: table_name = params.results_table else: table_name = 'results' table = db[table_name] results = { "params": params.__dict__, "test_losses": test_losses, "test_score": test_score, } table.insert(json.loads(json.dumps(results, default=common.serialize))) train, valid, test = dataset train_x, train_y = train valid_x, valid_y = valid test_x, test_y = test shared_train_x = sharedX(train_x) shared_valid_x = sharedX(valid_x) shared_test_x = sharedX(test_x) distilled_train_y = ensemble.classify(shared_train_x).eval() distilled_set = ( (shared_train_x, sharedX(distilled_train_y, dtype=numpy.int32)), (shared_valid_x, sharedX(valid_y, dtype=numpy.int32)), (shared_test_x, sharedX(test_y, dtype=numpy.int32)), ) #y = T.vector() mlp = test_mlp(distilled_set, params, y=y)
arg_param_pairings = [ (args.seed, 'random_seed'), (args.results_db, 'results_db'), (args.results_host, 'results_host'), (args.results_table, 'results_table'), (args.epochs, 'n_epochs'), ] from toupee import config params = config.load_parameters(args.params_file) def arg_params(arg_value, param): if arg_value is not None: params.__dict__[param] = arg_value for arg, param in arg_param_pairings: arg_params(arg, param) from toupee import data from toupee.mlp import MLP, test_mlp dataset = data.load_data(params.dataset, resize_to=params.resize_data_to, shared=False, pickled=params.pickled, center_and_normalise=params.center_and_normalise, join_train_and_valid=params.join_train_and_valid) pretraining_set = data.make_pretraining_set(dataset, params.pretraining) mlp = test_mlp(dataset, params, pretraining_set=pretraining_set) if args.save_file is not None: dill.dump(mlp, open(args.save_file, "wb"))
arg_param_pairings = [ (args.seed, 'random_seed'), (args.results_db, 'results_db'), (args.results_host, 'results_host'), (args.results_table, 'results_table'), (args.epochs, 'n_epochs'), ] from toupee import config params = config.load_parameters(args.params_file) def arg_params(arg_value,param): if arg_value is not None: params.__dict__[param] = arg_value for arg, param in arg_param_pairings: arg_params(arg,param) from toupee import data from toupee.mlp import MLP, test_mlp dataset = data.load_data(params.dataset, resize_to = params.resize_data_to, shared = False, pickled = params.pickled, center_and_normalise = params.center_and_normalise, join_train_and_valid = params.join_train_and_valid) pretraining_set = data.make_pretraining_set(dataset,params.pretraining) mlp = test_mlp(dataset, params, pretraining_set = pretraining_set) if args.save_file is not None: dill.dump(mlp,open(args.save_file,"wb"))
params.pretraining = None dataset = load_data(params.dataset, resize_to = params.resize_data_to, shared = False, pickled = params.pickled) x = T.matrix('x') y = T.ivector('y') index = T.lscalar('index') method = params.method method.prepare(params,dataset) train_set = method.resampler.get_train() valid_set = method.resampler.get_valid() test_set = method.resampler.get_test() shared_dataset = [train_set,valid_set,test_set] continuations = dill.load(open(sys.argv[2])) members = [test_mlp(shared_dataset, params, continuation = c, x=x, y=y, index=index) for c in continuations] ensemble = params.method.create_aggregator(params,members,x,y,train_set,valid_set) test_set_x, test_set_y = method.resampler.get_test() test_model = theano.function(inputs=[index], outputs=ensemble.errors, givens={ x: test_set_x[index * params.batch_size:(index + 1) * params.batch_size], y: test_set_y[index * params.batch_size:(index + 1) * params.batch_size]}) n_test_batches = test_set_x.shape[0].eval() / params.batch_size test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print 'Final error: {0} %'.format(test_score * 100.)
if 'results_table' in params.__dict__: table_name = params.results_table else: table_name = 'results' table = db[table_name] results = { "params": params.__dict__, "test_losses" : test_losses, "test_score" : test_score, } table.insert(json.loads(json.dumps(results,default=common.serialize))) train,valid,test = dataset train_x,train_y = train valid_x,valid_y = valid test_x,test_y = test shared_train_x = sharedX(train_x) shared_valid_x = sharedX(valid_x) shared_test_x = sharedX(test_x) distilled_train_y = ensemble.classify(shared_train_x).eval() distilled_set = ( (shared_train_x, sharedX(distilled_train_y, dtype = numpy.int32)), (shared_valid_x, sharedX(valid_y, dtype = numpy.int32)), (shared_test_x, sharedX(test_y, dtype = numpy.int32)), ) #y = T.vector() mlp = test_mlp(distilled_set, params, y = y)