Пример #1
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')
    x = m.mean() + x #stupid mask not always needed...

    #embedding_size = 300
    #glove_version = "glove.6B.300d.txt"

    embedding_size = 50
    glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0))
    conv1.initialize()
    o = conv1.apply(x)
    o = Rectifier(name="conv1red").apply(o)
    o = MaxPooling1D(pooling_length=5
            #, step=2
            ).apply(o)

    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv2")
    conv2.initialize()
    o = conv2.apply(o)

    o = Rectifier(name="conv2rec").apply(o)
    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv3")
    conv2.initialize()
    o = conv2.apply(o)
    o = Rectifier(name="conv3rec").apply(o)

    fork = Fork(weights_init=IsotropicGaussian(0.02),
            biases_init=Constant(0.),
            input_dim=128,
            output_dims=[128]*3,
            output_names=['inputs', 'reset_inputs', 'update_inputs']
            )
    fork.initialize()

    inputs, reset_inputs, update_inputs = fork.apply(o)

    out = o.mean(axis=1)

    #gru = GatedRecurrent(dim=128,
            #weights_init=IsotropicGaussian(0.02),
            #biases_init=IsotropicGaussian(0.0))

    #gru.initialize()
    #states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs)

    #out = states[:, -1, :]

    hidden = Linear(
        input_dim = 128,
        output_dim = 128,
        weights_init = Uniform(std=0.01),
        biases_init = Constant(0.))
    hidden.initialize()

    o = hidden.apply(out)
    o = Rectifier().apply(o)
    #hidden = Linear(
        #input_dim = 128,
        #output_dim = 128,
        #weights_init = IsotropicGaussian(std=0.02),
        #biases_init = Constant(0.),
        #name="hiddenmap2")
    #hidden.initialize()

    #o = hidden.apply(o)
    #o = Rectifier(name="rec2").apply(o)


    score_layer = Linear(
            input_dim = 128,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)

    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cost,
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=10),
                AdaM(),
                #AdaDelta(),
                ])

            )


    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    batch_size = 16
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')

        dataset = IMDBText(which_set)
        n_train = dataset.num_examples
        stream = DataStream(
                dataset=dataset,
                iteration_scheme=ShuffledScheme(
                    examples=n_train,
                    batch_size=batch_size)
                )
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    print "setting up model"
    #import ipdb
    #ipdb.set_trace()

    n_examples = 25000
    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True
        ))

    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        data_stream=test_stream,
        prefix='test',
        after_epoch=True
        ))
    extensions.append(Timing())
    extensions.append(Printing())

    #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True))

    main_loop = MainLoop(
            model=model,
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()