def _train_feedforward_net(experiment_name, json_file, data_triplet, wbm, num_reps, num_hidden_layers, num_hidden_units, dropout): rng = np.random.RandomState(100) arg1_model = InputLayer(rng, wbm.num_units, False) arg2_model = InputLayer(rng, wbm.num_units, False) nn, all_layers = make_multilayer_net_from_layers( input_layers=[arg1_model, arg2_model], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=dropout) learning_rate = 0.01 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.crossentropy, learning_rate, lr_smoother, data_triplet, util.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) nn.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'cost function': 'crossentropy', 'dropout': dropout } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _zh_experiment0_helper(vec_type, num_units, projection, num_hidden_layers, exp_name, relation_list, label_vector, label_alphabet): params = [vec_type, str(num_units), projection, str(num_hidden_layers)] file_name = '%s-%s' % (experiment_name, '-'.join(params)) json_file = util.set_logger(file_name) word2vec_ff = util._get_zh_word2vec_ff(num_units, vec_type, projection) data_matrix_pair = word2vec_ff(relation_list) learning_rate = 0.001 lr_smoother = 0.01 num_folds = 7 for fold_index in xrange(num_folds): data_triplet = get_xvalidated_datatriplet(data_matrix_pair, label_vector, label_alphabet, num_folds, fold_index) num_reps = 15 num_hidden_units_list = [50, 200, 300, 400] for num_hidden_units in num_hidden_units_list: rng = np.random.RandomState(100) X_list = [T.matrix(), T.matrix()] net, layers = make_multilayer_net(rng, n_in_list=data_triplet.input_dimensions(), X_list=X_list, Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) trainer = AdagradTrainer(net, net.crossentropy, learning_rate, lr_smoother, data_triplet, _make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) net.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print 'Training process takes %s seconds' % (end_time - start_time) print 'Best iteration is %s;' % best_iter + \ 'Best dev accuracy = %s' % best_dev_acc + \ 'Test accuracy =%s' % best_test_acc
def _net_experiment_stlstm_helper(experiment_name, json_file, data_triplet, wbm, num_reps, num_hidden_layers, num_hidden_units, proj_type): nn = _construct_net(data_triplet, wbm, num_hidden_layers, num_hidden_units, proj_type) learning_rate = 0.01 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.crossentropy, learning_rate, lr_smoother, data_triplet, BinaryTreeLSTM.make_givens, nn.misc_function) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) for layer in nn.layers: layer.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'crossentropy', 'projection' : proj_type, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _net_mixture_experiment_helper(experiment_name, json_file, data_triplet, num_reps, sparse_num_hidden_layers, cont_num_hidden_layers, use_moe, mixture_num_hidden_layers, num_hidden_units, proj_type): rng = np.random.RandomState(100) learning_rate = 0.001 lr_smoother = 0.01 if use_moe: output_activation_fn = T.nnet.softmax n_out = data_triplet.output_dimensions()[0] else: output_activation_fn = T.tanh n_out = num_hidden_units # the first one must be sparse X_list = [theano.sparse.csr_matrix(), T.matrix(), T.matrix()] sf_net, sf_layers = make_multilayer_net(rng, n_in_list=data_triplet.input_dimensions()[0:1], X_list=X_list[0:1], Y=T.lvector(), use_sparse=True, num_hidden_layers=sparse_num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=n_out, output_activation_fn=output_activation_fn) word2vec_net, word2vec_layers = make_multilayer_net(rng, n_in_list=data_triplet.input_dimensions()[1:], X_list=X_list[1:], Y=T.lvector(), use_sparse=False, num_hidden_layers=cont_num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=n_out, output_activation_fn=output_activation_fn) if use_moe: complete_net = MixtureOfExperts(rng, n_in_list=data_triplet.input_dimensions(), expert_list=[sf_net, word2vec_net], X_list=X_list, Y=T.lvector(), num_hidden_layers=mixture_num_hidden_layers, num_hidden_units=num_hidden_units) else: mixture_net, mixture_layers = make_multilayer_net(rng, n_in_list=[sf_layers[-1].n_out, word2vec_layers[-1].n_out], X_list=[sf_layers[-1].activation, word2vec_layers[-1].activation], Y=T.lvector(), use_sparse=False, num_hidden_layers=mixture_num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0]) complete_net = NeuralNet(sf_layers + word2vec_layers + mixture_layers) complete_net.input = X_list trainer = AdagradTrainer(complete_net, complete_net.crossentropy, learning_rate, lr_smoother, data_triplet, _make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) complete_net.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'sparse num hidden layers': sparse_num_hidden_layers, 'continuous num hidden layers': cont_num_hidden_layers, 'cost function': 'crossentropy', 'projection' : proj_type, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _net_experiment4_helper(json_file, num_hidden_layers, num_hidden_units, num_reps, data_triplet, use_hinge): n_epochs = 30 learning_rate = 0.01 lr_smoother = 0.01 rng = np.random.RandomState(100) X_list = [T.matrix(), T.matrix()] if num_hidden_layers == 0: first_layer = LinearLayer(rng, n_in_list=data_triplet.input_dimensions(), n_out=data_triplet.output_dimensions()[0], use_sparse=False, X_list=X_list, Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) else: first_layer = LinearLayer(rng, n_in_list=data_triplet.input_dimensions(), n_out=num_hidden_units, use_sparse=False, X_list=X_list, activation_fn=T.tanh) top_layer = first_layer layers = [first_layer] for i in range(num_hidden_layers): is_top_layer = i == (num_hidden_layers - 1) if is_top_layer: hidden_layer = LinearLayer(rng, n_in_list=[num_hidden_units], n_out=data_triplet.output_dimensions()[0], use_sparse=False, X_list=[top_layer.activation], Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) else: hidden_layer = LinearLayer(rng, n_in_list=[num_hidden_units], n_out=num_hidden_units, use_sparse=False, X_list=[top_layer.activation], activation_fn=T.tanh) hidden_layer.params.extend(top_layer.params) layers.append(hidden_layer) top_layer = hidden_layer top_layer.input= X_list trainer = AdagradTrainer(top_layer, top_layer.hinge_loss if use_hinge else top_layer.crossentropy, learning_rate, lr_smoother, data_triplet) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) for layer in layers: layer.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs, data_triplet) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'cost function': 'hinge' if use_hinge else 'crossentropy', 'num hidden layers': num_hidden_layers, 'num hidden units': num_hidden_units, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _net_experiment_tlstm_helper(json_file, model_file, data_triplet, wbm, num_reps, num_hidden_layers, num_hidden_units, use_hinge, proj_type): nn = _make_tlstm_net(data_triplet.training_data, wbm, data_triplet.output_dimensions()[0], num_hidden_layers, num_hidden_units, use_hinge, proj_type) start_time = timeit.default_timer() theano.function(inputs=nn.input+nn.output, outputs=nn.crossentropy) end_time = timeit.default_timer() num_data = len(data_triplet.training_data_label[0]) print 'crossentropy function for %s instances take %s seconds' % (num_data, end_time - start_time ) learning_rate = 0.001 lr_smoother = 0.01 indexed_data_triplet = DataTriplet( data_list=[ [np.arange(len(data_triplet.training_data_label[0]))], [np.arange(len(data_triplet.dev_data_label[0]))], [np.arange(len(data_triplet.test_data_label[0]))] ], label_vectors=[data_triplet.training_data_label, data_triplet.dev_data_label, data_triplet.test_data_label], label_alphabet_list=data_triplet.label_alphabet_list) #print nn.input #f = theano.function(inputs=nn.input[0:1] + nn.output, outputs=nn.crossentropy) #print f(np.array([2]), np.array([2])) start_time = timeit.default_timer() trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, indexed_data_triplet, BinaryForestLSTM.make_givens) end_time = timeit.default_timer() num_data = len(indexed_data_triplet.training_data_label[0]) print '%s instances take %s seconds' % (num_data, end_time - start_time ) return dev_model = _copy_tlstm_net(data_triplet.dev_data, nn, proj_type) test_model = _copy_tlstm_net(data_triplet.test_data, nn, proj_type) dev_accuracy = T.mean(T.eq(dev_model.output[-1], dev_model.predict[-1])) trainer.dev_eval_function = \ theano.function(inputs=dev_model.input + dev_model.output, outputs=[dev_accuracy, dev_model.crossentropy], on_unused_input='warn') test_accuracy = T.mean(T.eq(test_model.output[-1], test_model.predict[-1])) trainer.test_eval_function = \ theano.function(inputs=test_model.input + test_model.output, outputs=[test_accuracy, test_model.crossentropy], on_unused_input='warn') #with open(model_file, 'w') as f: #sys.setrecursionlimit(5000000) #cPickle.dump(trainer, f) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) for layer in nn.layers: layer.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) minibatch_size = 1 n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _net_experiment_lstm_helper(json_file, data_triplet, wbm, num_reps, LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, use_bl, arg_shared_weights): rng = np.random.RandomState(100) arg1_model = LSTMModel(rng, wbm.num_units) if arg_shared_weights: arg2_model = LSTMModel(rng, wbm.num_units, W=arg1_model.W, U=arg1_model.U, b=arg1_model.b) else: arg2_model = LSTMModel(rng, wbm.num_units) if proj_type == 'max_pool': proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h] elif proj_type == 'mean_pool': proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h] elif proj_type == 'sum_pool': proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h] elif proj_type == 'top': proj_variables = [arg1_model.top_h, arg2_model.top_h] else: raise ValueError('Invalid projection type: %s' % proj_type) hidden_layers = [] if use_bl: output_layer = BilinearLayer(rng, n_in1=wbm.num_units, n_in2=wbm.num_units, n_out=data_triplet.output_dimensions()[0], X1=proj_variables[0], X2=proj_variables[1], Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) else: n_in_list = [wbm.num_units, wbm.num_units] X_list = proj_variables for i in range(num_hidden_layers): hidden_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=num_hidden_units, use_sparse=False, X_list=X_list, activation_fn=T.tanh) n_in_list = [num_hidden_units] X_list = [hidden_layer.activation] hidden_layers.append(hidden_layer) output_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=data_triplet.output_dimensions()[0], use_sparse=False, X_list=X_list, Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) nn = NeuralNet() layers = [arg1_model, arg2_model, output_layer] + hidden_layers nn.params.extend(arg1_model.params) if not arg_shared_weights: nn.params.extend(arg2_model.params) nn.params.extend(output_layer.params) for hidden_layer in hidden_layers: nn.params.extend(hidden_layer.params) nn.layers = layers nn.input.extend(arg1_model.input) nn.input.extend(arg2_model.input) nn.output.extend(output_layer.output) nn.predict = output_layer.predict nn.hinge_loss = output_layer.hinge_loss nn.crossentropy = output_layer.crossentropy learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) for layer in layers: layer.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) minibatch_size = 1 n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _att_experiment_ff_helper(experiment_name, attention_model, json_file, data_triplet, wbm, num_reps, num_att_hidden_layer, num_hidden_layers, num_hidden_units, dropout): rng = np.random.RandomState(100) arg1_model = attention_model(rng, wbm.num_units, num_att_hidden_layer, num_hidden_units, dropout=False) arg2_model = attention_model(rng, wbm.num_units, num_att_hidden_layer, num_hidden_units, dropout=False) nn, all_layers = make_multilayer_net_from_layers( input_layers=[arg1_model, arg2_model], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=dropout) nn.input = arg1_model.input + arg2_model.input #print 'before num params %s' % len(nn.params) #nn.params = nn.params[(len(arg1_model.params) + len(arg2_model.params)):] #print 'after num params %s' % len(nn.params) learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.crossentropy, learning_rate, lr_smoother, data_triplet, util.make_givens_srm) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) nn.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print 'Training process takes %s seconds' % end_time - start_time print 'Best iteration is %s;' % best_iter + \ 'Best dev accuracy = %s' % best_dev_acc + \ 'Test accuracy =%s' % best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'cost function': 'crossentropy', 'dropout': dropout } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _net_experiment_lstm_helper(experiment_name, json_file, data_triplet, num_units, num_reps, LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, use_bl, arg_shared_weights): rng = np.random.RandomState(100) arg1_model = LSTMModel(rng, num_units) if arg_shared_weights: arg2_model = LSTMModel(rng, num_units, W=arg1_model.W, U=arg1_model.U, b=arg1_model.b) else: arg2_model = LSTMModel(rng, num_units) arg1_pooled = MaskedInputLayer(rng, num_units, proj_type, arg1_model.h, arg1_model.mask, arg1_model.c_mask) arg2_pooled = MaskedInputLayer(rng, num_units, proj_type, arg2_model.h, arg2_model.mask, arg2_model.c_mask) if use_bl: raise ValueError('bilinear is not yet supported') else: _, pred_layers = make_multilayer_net_from_layers( input_layers=[arg1_pooled, arg2_pooled], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) # to make sure that the parameters are in the same place nn = NeuralNet([arg1_model, arg2_model] + pred_layers) nn.input = arg1_model.input + arg2_model.input learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) nn.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print 'Training process takes %s seconds' % (end_time - start_time) print 'Best iteration is %s;' % best_iter + \ 'Best dev accuracy = %s' % best_dev_acc + \ 'Test accuracy =%s' % best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, 'dropout' : False } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))