def _copy_tlstm_net(data_list, nn, proj_type):
    arg1_model = nn.layers[0].copy(data_list[0])
    arg2_model = nn.layers[1].copy(data_list[1])
    if proj_type == 'max_pool':
        proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h]
    elif proj_type == 'mean_pool':
        proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h]
    elif proj_type == 'sum_pool':
        proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h]
    elif proj_type == 'top':
        proj_variables = [arg1_model.top_h, arg2_model.top_h]
    else:
        raise ValueError('Invalid projection type: %s' % proj_type)
    X_list = proj_variables
    new_hidden_layers = []
    for hidden_layer in nn.layers[2:-1]:
       new_hidden_layer = hidden_layer.copy(X_list) 
       X_list = [new_hidden_layer.activation]
       new_hidden_layers.append(new_hidden_layer)
    output_layer = nn.layers[-1].copy(X_list)

    new_nn = NeuralNet()
    layers = [arg1_model, arg2_model] + new_hidden_layers +  [output_layer] 
    new_nn.layers = layers

    new_nn.input.extend(arg1_model.input)
    new_nn.output.extend(output_layer.output)
    new_nn.predict = output_layer.predict
    new_nn.hinge_loss = output_layer.hinge_loss
    new_nn.crossentropy = output_layer.crossentropy
    return new_nn
示例#2
0
def build_lstm_network(data_triplet, num_hidden_layers, num_hidden_units, proj_type,
        learning_rate=0.001, lr_smoother = 0.01):
    rng = np.random.RandomState(100)
    num_units = data_triplet.training_data[0].shape[2]
    arg1_model = SerialLSTM(rng, num_units, proj_type)
    arg2_model = SerialLSTM(rng, num_units, proj_type)
    arg1_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg1_model.activation_train, arg1_model.mask, arg1_model.c_mask)
    arg2_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg2_model.activation_train, arg2_model.mask, arg2_model.c_mask)
    _, pred_layers = make_multilayer_net_from_layers(
            input_layers=[arg1_pooled, arg2_pooled],
            Y=T.lvector(), use_sparse=False,
            num_hidden_layers=num_hidden_layers,
            num_hidden_units=num_hidden_units,
            num_output_units=data_triplet.output_dimensions()[0],
            output_activation_fn=T.nnet.softmax,
            dropout=False)
    net = NeuralNet([arg1_model, arg2_model] + pred_layers)
    net.input = arg1_model.input + arg2_model.input
    trainer = AdagradTrainer(net, net.crossentropy, 
            learning_rate, lr_smoother, data_triplet, SerialLSTM.make_givens)
    return net, trainer
def _make_tlstm_net(data_list, wbm, num_output_units,
        num_hidden_layers, num_hidden_units, use_hinge, proj_type):

    rng = np.random.RandomState(100)
    indices = T.lvector()

    arg1_model = BinaryForestLSTM(data_list[0], rng, wbm, X_list=[indices])
    arg2_model = BinaryForestLSTM(data_list[1], rng, wbm, X_list=[indices])
    #f = theano.function(inputs=arg1_model.input, 
           #outputs=[arg1_model.max_pooled_h, arg1_model.all_max_pooled_h.shape])
    #print f(np.array([2]))
    #f = theano.function(inputs=arg1_model.input, 
           #outputs=[arg1_model.sum_pooled_h, arg1_model.all_sum_pooled_h.shape])
    #print f(np.array([2]))

    #f = theano.function(inputs=arg2_model.input, 
           #outputs=[arg2_model.max_pooled_h, arg2_model.all_max_pooled_h.shape])
    #print f(np.array([2]))
    if proj_type == 'max_pool':
        proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h]
    elif proj_type == 'mean_pool':
        proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h]
    elif proj_type == 'sum_pool':
        proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h]
    elif proj_type == 'top':
        proj_variables = [arg1_model.top_h, arg2_model.top_h]
    else:
        raise ValueError('Invalid projection type: %s' % proj_type)
    hidden_layers = []    
    n_in_list = [wbm.num_units, wbm.num_units]
    X_list = proj_variables
    for i in range(num_hidden_layers):
        hidden_layer = LinearLayer(rng, 
                n_in_list=n_in_list, 
                n_out=num_hidden_units, 
                use_sparse=False, 
                X_list=X_list, 
                activation_fn=T.tanh)
        n_in_list = [num_hidden_units]
        X_list = [hidden_layer.activation]
        hidden_layers.append(hidden_layer)
    output_layer = LinearLayer(rng, 
            n_in_list=n_in_list,
            n_out=num_output_units,
            use_sparse=False,
            X_list=X_list,
            Y=T.lvector(),
            activation_fn=None if use_hinge else T.nnet.softmax)

    nn = NeuralNet()
    layers = [arg1_model, arg2_model] + hidden_layers +  [output_layer] 
    nn.params.extend(arg1_model.params)
    nn.params.extend(arg2_model.params)
    nn.params.extend(output_layer.params)
    for hidden_layer in hidden_layers:
        nn.params.extend(hidden_layer.params)
    nn.layers = layers

    nn.input.extend(arg1_model.input)
    nn.output.extend(output_layer.output)
    nn.predict = output_layer.predict
    nn.hinge_loss = output_layer.hinge_loss
    nn.crossentropy = output_layer.crossentropy
    return nn
def _net_experiment_lstm_helper(json_file, data_triplet, wbm, num_reps, 
        LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, 
        use_bl, arg_shared_weights):

    rng = np.random.RandomState(100)
    arg1_model = LSTMModel(rng, wbm.num_units)
    if arg_shared_weights:
        arg2_model = LSTMModel(rng, wbm.num_units, 
                W=arg1_model.W, U=arg1_model.U, b=arg1_model.b)
    else:
        arg2_model = LSTMModel(rng, wbm.num_units)

    if proj_type == 'max_pool':
        proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h]
    elif proj_type == 'mean_pool':
        proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h]
    elif proj_type == 'sum_pool':
        proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h]
    elif proj_type == 'top':
        proj_variables = [arg1_model.top_h, arg2_model.top_h]
    else:
        raise ValueError('Invalid projection type: %s' % proj_type)

    hidden_layers = []    
    if use_bl:
        output_layer = BilinearLayer(rng, 
                n_in1=wbm.num_units,
                n_in2=wbm.num_units,
                n_out=data_triplet.output_dimensions()[0],
                X1=proj_variables[0],
                X2=proj_variables[1],
                Y=T.lvector(),
                activation_fn=None if use_hinge else T.nnet.softmax)
    else:
        n_in_list = [wbm.num_units, wbm.num_units]
        X_list = proj_variables
        for i in range(num_hidden_layers):
            hidden_layer = LinearLayer(rng,
                n_in_list=n_in_list, 
                n_out=num_hidden_units,
                use_sparse=False, 
                X_list=X_list, 
                activation_fn=T.tanh)
            n_in_list = [num_hidden_units]
            X_list = [hidden_layer.activation]
            hidden_layers.append(hidden_layer)
        output_layer = LinearLayer(rng, 
                n_in_list=n_in_list,
                n_out=data_triplet.output_dimensions()[0],
                use_sparse=False,
                X_list=X_list,
                Y=T.lvector(),
                activation_fn=None if use_hinge else T.nnet.softmax)

    nn = NeuralNet()
    layers = [arg1_model, arg2_model, output_layer] + hidden_layers
    nn.params.extend(arg1_model.params)
    if not arg_shared_weights:
        nn.params.extend(arg2_model.params)
    nn.params.extend(output_layer.params)
    for hidden_layer in hidden_layers:
        nn.params.extend(hidden_layer.params)
    nn.layers = layers

    nn.input.extend(arg1_model.input)
    nn.input.extend(arg2_model.input)
    nn.output.extend(output_layer.output)
    nn.predict = output_layer.predict
    nn.hinge_loss = output_layer.hinge_loss
    nn.crossentropy = output_layer.crossentropy

    learning_rate = 0.001
    lr_smoother = 0.01

    trainer = AdagradTrainer(nn,
            nn.hinge_loss if use_hinge else nn.crossentropy,
            learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens)
    
    for rep in xrange(num_reps):
        random_seed = rep
        rng = np.random.RandomState(random_seed)
        for layer in layers:
            layer.reset(rng)
        trainer.reset()
        
        minibatch_size = np.random.randint(20, 60)
        minibatch_size = 1
        n_epochs = 50

        start_time = timeit.default_timer()
        best_iter, best_dev_acc, best_test_acc = \
                trainer.train_minibatch_triplet(minibatch_size, n_epochs)
        end_time = timeit.default_timer()
        print end_time - start_time 
        print best_iter, best_dev_acc, best_test_acc
        result_dict = {
                'test accuracy': best_test_acc,
                'best dev accuracy': best_dev_acc,
                'best iter': best_iter,
                'random seed': random_seed,
                'minibatch size': minibatch_size,
                'learning rate': learning_rate,
                'lr smoother': lr_smoother,
                'experiment name': experiment_name,
                'num hidden units': num_hidden_units,
                'num hidden layers': num_hidden_layers,
                'cost function': 'hinge loss' if use_hinge else 'crossentropy',
                'projection' : proj_type,
                }
        json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _construct_net(data_triplet, wbm, 
        num_hidden_layers, num_hidden_units, proj_type):
    rng = np.random.RandomState(100)
    arg1_model = BinaryTreeLSTM(rng, wbm.num_units)
    arg2_model = BinaryTreeLSTM(rng, wbm.num_units)

    arg1_node_label_layer = LinearLayerTensorOutput(rng, 
            n_in=wbm.num_units, 
            n_out=data_triplet.output_dimensions()[0],
            X=arg1_model.h)
    arg2_node_label_layer = LinearLayerTensorOutput(rng, 
            n_in=wbm.num_units, 
            n_out=data_triplet.output_dimensions()[1],
            X=arg2_model.h)

    if proj_type == 'max_pool':
        proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h]
    elif proj_type == 'mean_pool':
        proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h]
    elif proj_type == 'sum_pool':
        proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h]
    elif proj_type == 'top':
        proj_variables = [arg1_model.top_h, arg2_model.top_h]
    else:
        raise ValueError('Invalid projection type: %s' % proj_type)

    hidden_layers = []    
    n_in_list = [wbm.num_units, wbm.num_units]
    X_list = proj_variables
    for i in range(num_hidden_layers):
        hidden_layer = LinearLayer(rng,
            n_in_list=n_in_list, 
            n_out=num_hidden_units,
            use_sparse=False, 
            X_list=X_list, 
            activation_fn=T.tanh)
        n_in_list = [num_hidden_units]
        X_list = [hidden_layer.activation]
        hidden_layers.append(hidden_layer)
    label_output_layer = LinearLayer(rng, 
            n_in_list=n_in_list,
            n_out=data_triplet.output_dimensions()[2],
            use_sparse=False,
            X_list=X_list,
            Y=T.lvector(),
            activation_fn=T.nnet.softmax)

    nn = NeuralNet()
    layers = [arg1_model, arg2_model, 
            arg1_node_label_layer, arg2_node_label_layer, label_output_layer] \
                    + hidden_layers
    for layer in layers:
        nn.params.extend(layer.params)
    nn.layers = layers

    nn.input.extend(arg1_model.input)
    nn.input.extend(arg2_model.input)
    nn.output.extend(arg1_node_label_layer.output + 
            arg2_node_label_layer.output + 
            label_output_layer.output)
    nn.predict = label_output_layer.predict
    nn.crossentropy = label_output_layer.crossentropy + \
            0.5 * arg1_node_label_layer.crossentropy + \
            0.5 * arg2_node_label_layer.crossentropy
    
    nn.misc_function = arg1_node_label_layer.miscs + arg2_node_label_layer.miscs
    """
    nn.crossentropy = arg2_node_label_layer.crossentropy + arg1_node_label_layer.crossentropy
    nn.params = arg2_node_label_layer.params + arg1_node_label_layer.params
    nn.misc_function = [arg1_node_label_layer.W[0:3,0:3], arg2_node_label_layer.W[0:3,0:3]]
    nn.crossentropy = label_output_layer.crossentropy
    if len(hidden_layers) > 0:
        nn.layers = [arg1_model, arg2_model, hidden_layers[0], label_output_layer]
        nn.params = arg1_model.params + arg2_model.params + hidden_layers[0].params + label_output_layer.params
    else:
        nn.layers = [arg1_model, arg2_model, label_output_layer]
        nn.params = arg1_model.params + arg2_model.params + label_output_layer.params
    nn.misc_function = [label_output_layer.predict[0], 
            label_output_layer.W_list[0][0:3,0:3],
            arg1_node_label_layer.W[0:3,0:3],
            label_output_layer.activation]
    """

    return nn
def _net_experiment_lstm_helper(experiment_name,
        json_file, data_triplet, num_units, num_reps, 
        LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, 
        use_bl, arg_shared_weights):

    rng = np.random.RandomState(100)
    arg1_model = LSTMModel(rng, num_units)
    if arg_shared_weights:
        arg2_model = LSTMModel(rng, num_units, 
                W=arg1_model.W, U=arg1_model.U, b=arg1_model.b)
    else:
        arg2_model = LSTMModel(rng, num_units)


    arg1_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg1_model.h, arg1_model.mask, arg1_model.c_mask)
    arg2_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg2_model.h, arg2_model.mask, arg2_model.c_mask)

    if use_bl:
        raise ValueError('bilinear is not yet supported')
    else:
        _, pred_layers = make_multilayer_net_from_layers(
                input_layers=[arg1_pooled, arg2_pooled],
                Y=T.lvector(), use_sparse=False,
                num_hidden_layers=num_hidden_layers,
                num_hidden_units=num_hidden_units,
                num_output_units=data_triplet.output_dimensions()[0],
                output_activation_fn=T.nnet.softmax,
                dropout=False)
    # to make sure that the parameters are in the same place
    nn = NeuralNet([arg1_model, arg2_model] + pred_layers)
    nn.input = arg1_model.input + arg2_model.input

    learning_rate = 0.001
    lr_smoother = 0.01

    trainer = AdagradTrainer(nn,
            nn.hinge_loss if use_hinge else nn.crossentropy,
            learning_rate, lr_smoother, 
            data_triplet, LSTMModel.make_givens)
    
    for rep in xrange(num_reps):
        random_seed = rep
        rng = np.random.RandomState(random_seed)
        nn.reset(rng)
        trainer.reset()
        
        minibatch_size = np.random.randint(20, 60)
        n_epochs = 50

        start_time = timeit.default_timer()
        best_iter, best_dev_acc, best_test_acc = \
                trainer.train_minibatch_triplet(minibatch_size, n_epochs)
        end_time = timeit.default_timer()
        print 'Training process takes %s seconds' % (end_time - start_time)
        print 'Best iteration is %s;' % best_iter + \
                'Best dev accuracy = %s' % best_dev_acc + \
                'Test accuracy =%s' % best_test_acc
        result_dict = {
                'test accuracy': best_test_acc,
                'best dev accuracy': best_dev_acc,
                'best iter': best_iter,
                'random seed': random_seed,
                'minibatch size': minibatch_size,
                'learning rate': learning_rate,
                'lr smoother': lr_smoother,
                'experiment name': experiment_name,
                'num hidden units': num_hidden_units,
                'num hidden layers': num_hidden_layers,
                'cost function': 'hinge loss' if use_hinge else 'crossentropy',
                'projection' : proj_type,
                'dropout' : False
                }
        json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))