示例#1
0
def test_pylearn2_trainin():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    block_cost = BlocksCost(cost)
    block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features'))

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Sigmoid(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.params[0]]
    bias = [brick1.b, brick2.params[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex='W_no.?m')
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)

    # Testing filtering by application
    appli_filter_list = VariableFilter(applications=[brick1.apply])
    assert variables == appli_filter_list(cg.variables)
示例#3
0
    def __init__(self, input_dim, hidden_dim, **kwargs):
        super(VariationalAutoEncoder, self).__init__(**kwargs)

        encoder_mlp = MLP([Sigmoid(), Identity()],
                          [input_dim, 101, None])
        decoder_mlp = MLP([Sigmoid(), Sigmoid()],
                          [hidden_dim, 101, input_dim])
        self.hidden_dim = hidden_dim
        self.encoder = VAEEncoder(encoder_mlp, hidden_dim)
        self.decoder = VAEDecoder(decoder_mlp)
        self.children = [self.encoder, self.decoder]
示例#4
0
 def __init__(self, visible_dim, hidden_dim, activation=Sigmoid(),
              **kwargs):
     super(Rbm, self).__init__(**kwargs)
     self.hidden_dim = hidden_dim
     self.visible_dim = visible_dim
     self.activation = activation
     self.children = [activation]
示例#5
0
    def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
        super(GatedRecurrent, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Sigmoid()
        self.activation = activation
        self.gate_activation = gate_activation

        self.children = [activation, gate_activation]
示例#6
0
def initialize_rbm(Wrbm=None, bh=None, bv=None):
    rbm = Rbm(visible_dim=88, hidden_dim=256,
              activation=Sigmoid(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.1),
              name='rbm2l')
    rbm.allocate()
    rbm.initialize()

    if Wrbm is not None:
        rbm.W.set_value(Wrbm)
    if bv is not None:
        rbm.bv.set_value(bv)
    if bh is not None:
        rbm.bh.set_value(bh)

    return rbm
示例#7
0
    def __init__(self, activation, gate_activation, dim,
                 use_update_gate=True, use_reset_gate=True, **kwargs):
        super(GatedRecurrent, self).__init__(**kwargs)
        self.dim = dim
        self.use_update_gate = use_update_gate
        self.use_reset_gate = use_reset_gate

        if not activation:
            activation = Identity()
        if not gate_activation:
            gate_activation = Sigmoid()
        self.activation = activation
        self.gate_activation = gate_activation

        self.children = [activation, gate_activation]
示例#8
0
    def __init__(self,
                 activation,
                 gate_activation,
                 dim,
                 use_update_gate=True,
                 use_reset_gate=True,
                 **kwargs):
        super(GatedRecurrent, self).__init__(**kwargs)

        if not activation:
            activation = Identity()
        if not gate_activation:
            gate_activation = Sigmoid()

        update_instance(self, locals())
        self.children = [activation, gate_activation]
示例#9
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(
            input_dim=dims[0],
            output_dim=dims[1],
            weights_init=IsotropicGaussian(0.01),
            # biases_init=Constant(0.0),
            use_bias=False,
            name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1],
                                         activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1],
                                   output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2],
                               activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()],
                                 dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [
            self.input_transform, self.gru_layer, self.linear_trans,
            self.lstm_layer, self.out_transform
        ]
示例#10
0
    def __init__(self,
                 visible_dim,
                 hidden_dim,
                 rnn_dimensions=(128, 128),
                 **kwargs):
        super(Rnnrbm, self).__init__(**kwargs)
        self.rnn_dimensions = rnn_dimensions
        self.visible_dim = visible_dim
        self.hidden_dim = hidden_dim

        # self.in_layer = Linear(input_dim=input_dim, output_dim=rnn_dimension * 4,
        # weights_init=IsotropicGaussian(0.01),
        # biases_init=Constant(0.0),
        # use_bias=False,
        # name="in_layer")

        self.rbm = Rbm(visible_dim=visible_dim,
                       hidden_dim=hidden_dim,
                       activation=Sigmoid(),
                       weights_init=IsotropicGaussian(0.1),
                       biases_init=Constant(0.1),
                       name='rbm')

        self.uv = Linear(input_dim=rnn_dimensions[-1],
                         output_dim=visible_dim,
                         weights_init=IsotropicGaussian(0.0001),
                         biases_init=Constant(0.001),
                         use_bias=True,
                         name='uv')

        self.uh = Linear(input_dim=rnn_dimensions[-1],
                         output_dim=hidden_dim,
                         weights_init=IsotropicGaussian(0.0001),
                         biases_init=Constant(0.001),
                         use_bias=True,
                         name='uh')

        self.rnn = Rnn([visible_dim] + list(rnn_dimensions), name='rnn')

        self.children = [self.rbm, self.uv, self.uh, self.rnn
                         ] + self.rnn.children._items
示例#11
0
def main_run(_config, _log):
    from collections import namedtuple
    c = namedtuple("Config", _config.keys())(*_config.values())

    _log.info("Running with" + str(_config))

    import theano
    from theano import tensor as T
    import numpy as np

    from dataset import IMDBText, GloveTransformer

    from blocks.initialization import Uniform, Constant, IsotropicGaussian, NdarrayInitialization, Identity, Orthogonal
    from blocks.bricks.recurrent import LSTM, SimpleRecurrent, GatedRecurrent
    from blocks.bricks.parallel import Fork

    from blocks.bricks import Linear, Sigmoid, Tanh, Rectifier
    from blocks import bricks

    from blocks.extensions import Printing, Timing
    from blocks.extensions.monitoring import (DataStreamMonitoring,
                                              TrainingDataMonitoring)

    from blocks.extensions.plot import Plot
    from plot import PlotHistogram

    from blocks.algorithms import GradientDescent, Adam, Scale, StepClipping, CompositeRule, AdaDelta
    from blocks.graph import ComputationGraph, apply_dropout
    from blocks.main_loop import MainLoop
    from blocks.model import Model

    from cuboid.algorithms import AdaM, NAG
    from cuboid.extensions import EpochProgress

    from fuel.streams import DataStream, ServerDataStream
    from fuel.transformers import Padding

    from fuel.schemes import ShuffledScheme
    from Conv1D import Conv1D, MaxPooling1D
    from schemes import BatchwiseShuffledScheme
    from bricks import WeightedSigmoid, GatedRecurrentFull

    from multiprocessing import Process
    import fuel
    import logging
    from initialization import SumInitialization

    from transformers import DropSources
    global train_p
    global test_p

    x = T.tensor3('features')
    #m = T.matrix('features_mask')
    y = T.imatrix('targets')

    #x = x+m.mean()*0

    dropout_variables = []
    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"

    gloveMapping = Linear(
        input_dim=embedding_size,
        output_dim=c.rnn_input_dim,
        weights_init=Orthogonal(),
        #weights_init = IsotropicGaussian(c.wstd),
        biases_init=Constant(0.0),
        name="gloveMapping")
    gloveMapping.initialize()
    o = gloveMapping.apply(x)
    o = Rectifier(name="gloveRec").apply(o)
    dropout_variables.append(o)

    summed_mapped_glove = o.sum(axis=1)  # take out the sequence
    glove_out = Linear(input_dim=c.rnn_input_dim,
                       output_dim=1.0,
                       weights_init=IsotropicGaussian(c.wstd),
                       biases_init=Constant(0.0),
                       name="mapping_to_output")
    glove_out.initialize()
    deeply_sup_0 = glove_out.apply(summed_mapped_glove)
    deeply_sup_probs = Sigmoid(name="deeply_sup_softmax").apply(deeply_sup_0)

    input_dim = c.rnn_input_dim
    hidden_dim = c.rnn_dim

    gru = GatedRecurrentFull(
        hidden_dim=hidden_dim,
        activation=Tanh(),
        #activation=bricks.Identity(),
        gate_activation=Sigmoid(),
        state_to_state_init=SumInitialization(
            [Identity(1.0), IsotropicGaussian(c.wstd)]),
        state_to_reset_init=IsotropicGaussian(c.wstd),
        state_to_update_init=IsotropicGaussian(c.wstd),
        input_to_state_transform=Linear(input_dim=input_dim,
                                        output_dim=hidden_dim,
                                        weights_init=IsotropicGaussian(c.wstd),
                                        biases_init=Constant(0.0)),
        input_to_update_transform=Linear(
            input_dim=input_dim,
            output_dim=hidden_dim,
            weights_init=IsotropicGaussian(c.wstd),
            #biases_init=Constant(-2.0)),
            biases_init=Constant(-1.0)),
        input_to_reset_transform=Linear(
            input_dim=input_dim,
            output_dim=hidden_dim,
            weights_init=IsotropicGaussian(c.wstd),
            #biases_init=Constant(-3.0))
            biases_init=Constant(-2.0)))
    gru.initialize()
    rnn_in = o.dimshuffle(1, 0, 2)
    #rnn_in = o
    #rnn_out = gru.apply(rnn_in, mask=m.T)
    rnn_out = gru.apply(rnn_in)
    state_to_state = gru.rnn.state_to_state
    state_to_state.name = "state_to_state"
    #o = rnn_out[-1, :, :]
    o = rnn_out[-1]

    #o = rnn_out[:, -1, :]
    #o = rnn_out.mean(axis=1)

    #print rnn_last_out.eval({
    #x: np.ones((3, 101, 300), dtype=theano.config.floatX),
    #m: np.ones((3, 101), dtype=theano.config.floatX)})
    #raw_input()
    #o = rnn_out.mean(axis=1)
    dropout_variables.append(o)

    score_layer = Linear(input_dim=hidden_dim,
                         output_dim=1,
                         weights_init=IsotropicGaussian(std=c.wstd),
                         biases_init=Constant(0.),
                         name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    #probs = deeply_sup_probs
    cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean()
    #cost_deeply_sup0 = - (y * T.log(deeply_sup_probs) + (1-y) * T.log(1 - deeply_sup_probs)).mean()
    # cost += cost_deeply_sup0 * c.deeply_factor

    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print rnn_in.shape.eval(
    #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    #})
    #print rnn_out.shape.eval(
    #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()

    # =================

    cg = ComputationGraph([cost])
    cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5)
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cg.outputs[0],
        params=params,
        step_rule=CompositeRule([
            StepClipping(threshold=4),
            Adam(learning_rate=0.002, beta1=0.1, beta2=0.001),
            #NAG(lr=0.1, momentum=0.9),
            #AdaDelta(),
        ]))

    # ========
    print "setting up data"
    ports = {
        'gpu0_train': 5557,
        'gpu0_test': 5558,
        'cuda0_train': 5557,
        'cuda0_test': 5558,
        'opencl0:0_train': 5557,
        'opencl0:0_test': 5558,
        'gpu1_train': 5559,
        'gpu1_test': 5560,
    }

    #batch_size = 16
    #batch_size = 32
    batch_size = 40

    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train,
                                         batch_size=batch_size)

        stream = DataStream(dataset=dataset, iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
            data_stream=glove,
            #mask_sources=('features',)
            mask_sources=('features', ))

        padded = DropSources(padded, ['features_mask'])

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    #train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    #test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    train_stream = ServerDataStream(('features', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'targets'), port=test_port)

    print "setting up model"
    #ipdb.set_trace()

    n_examples = 25000
    print "Batches per epoch", n_examples // (batch_size + 1)
    batches_extensions = 100
    monitor_rate = 50
    #======
    model = Model(cg.outputs[0])
    extensions = []
    extensions.append(
        EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(
        TrainingDataMonitoring(
            [cost, misclassification],
            prefix='train',
            every_n_batches=monitor_rate,
        ))

    extensions.append(
        DataStreamMonitoring([cost, misclassification],
                             data_stream=test_stream,
                             prefix='test',
                             after_epoch=True,
                             before_first_epoch=False))

    extensions.append(Timing())
    extensions.append(Printing())

    #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    #extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True))

    #extensions.append(PlotHistogram(
    #channels=['train_state_to_state'],
    #bins=50,
    #every_n_batches=30))

    extensions.append(
        Plot(theano.config.device + "_result",
             channels=[['train_cost'], ['train_misclassification']],
             every_n_batches=monitor_rate))

    main_loop = MainLoop(model=model,
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
示例#12
0
def main():
    x = T.tensor3('features')
    #m = T.matrix('features_mask')
    y = T.imatrix('targets')

    #x = x+m.mean()*0

    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    #vaguely normalize
    x = x / 3.0 - .5

    #gloveMapping = Linear(
    #input_dim = embedding_size,
    #output_dim = 128,
    #weights_init = Orthogonal(),
    #biases_init = Constant(0.0),
    #name="gloveMapping"
    #)
    #gloveMapping.initialize()
    #o = gloveMapping.apply(x)
    #o = Rectifier(name="gloveRec").apply(o)
    o = x
    input_dim = 300

    gru = GatedRecurrentFull(
        hidden_dim=input_dim,
        activation=Tanh(),
        #activation=bricks.Identity(),
        gate_activation=Sigmoid(),
        state_to_state_init=IsotropicGaussian(0.02),
        state_to_reset_init=IsotropicGaussian(0.02),
        state_to_update_init=IsotropicGaussian(0.02),
        input_to_state_transform=Linear(input_dim=input_dim,
                                        output_dim=input_dim,
                                        weights_init=IsotropicGaussian(0.02),
                                        biases_init=Constant(0.0)),
        input_to_update_transform=Linear(input_dim=input_dim,
                                         output_dim=input_dim,
                                         weights_init=IsotropicGaussian(0.02),
                                         biases_init=Constant(0.0)),
        input_to_reset_transform=Linear(input_dim=input_dim,
                                        output_dim=input_dim,
                                        weights_init=IsotropicGaussian(0.02),
                                        biases_init=Constant(0.0)))
    gru.initialize()
    rnn_in = o.dimshuffle(1, 0, 2)
    #rnn_in = o
    #rnn_out = gru.apply(rnn_in, mask=m.T)
    rnn_out = gru.apply(rnn_in)
    state_to_state = gru.rnn.state_to_state
    state_to_state.name = "state_to_state"
    #o = rnn_out[-1, :, :]
    o = rnn_out[-1]

    #o = rnn_out[:, -1, :]
    #o = rnn_out.mean(axis=1)

    #print rnn_last_out.eval({
    #x: np.ones((3, 101, 300), dtype=theano.config.floatX),
    #m: np.ones((3, 101), dtype=theano.config.floatX)})
    #raw_input()
    #o = rnn_out.mean(axis=1)

    score_layer = Linear(input_dim=300,
                         output_dim=1,
                         weights_init=IsotropicGaussian(std=wstd),
                         biases_init=Constant(0.),
                         use_bias=True,
                         name="linear_score")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print rnn_in.shape.eval(
    #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    #})
    #print rnn_out.shape.eval(
    #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
    #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()

    # =================

    cg = ComputationGraph([cost])
    #cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5)
    params = cg.parameters
    for p in params:
        p.name += "___" + p.tag.annotations[0].name

    algorithm = GradientDescent(
        cost=cg.outputs[0],
        params=params,
        step_rule=CompositeRule([
            StepClipping(threshold=4),
            AdaM(),
            #NAG(lr=0.1, momentum=0.9),
            #AdaDelta(),
        ]))

    #algorithm.initialize()
    print params
    f = theano.function([x, y], algorithm.cost)
    ipdb.set_trace()

    print "making plots"
    #theano.printing.pydotprint(algorithm.cost, outfile='unopt.png')
    theano.printing.pydotprint(f, outfile='opt.png', scan_graphs=True)
示例#13
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')

    x = x+m.mean()*0

    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    #vaguely normalize
    x = x / 3.0 - .5

    #gloveMapping = Linear(
            #input_dim = embedding_size,
            #output_dim = 128,
            #weights_init = Orthogonal(),
            #biases_init = Constant(0.0),
            #name="gloveMapping"
            #)
    #gloveMapping.initialize()
    #o = gloveMapping.apply(x)
    #o = Rectifier(name="gloveRec").apply(o)

    rnn_in = x.dimshuffle(1, 0, 2)
    class Stub(object):
        def output(self, dropout_active=False):
            return rnn_in

    l_in = Stub()
    l_in.size = 300

    layer = GatedRecurrentPassage(
            size=300,
            gate_activation='sigmoid')
    layer.connect(l_in)
    from blocks.roles import add_role, WEIGHT, INITIAL_STATE
    print layer.params
    [add_role(l, WEIGHT) for l in layer.params]

    rnn_out = layer.output()
    o = rnn_out
    #o = rnn_out[-1, :, :]

    #o = rnn_out[:, -1, :]
    #o = rnn_out.mean(axis=1)

    #print rnn_last_out.eval({
        #x: np.ones((3, 101, 300), dtype=theano.config.floatX), 
        #m: np.ones((3, 101), dtype=theano.config.floatX)})
    #raw_input()
    #o = rnn_out.mean(axis=1)

    score_layer = Linear(
            input_dim = 300,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print rnn_in.shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #})
    #print rnn_out.shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    #cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5)
    params = cg.parameters
    print params
    print "Len params", len(params)

    algorithm = GradientDescent(
            cost = cg.outputs[0],
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=4),
                AdaM(),
                #NAG(lr=0.1, momentum=0.9),
                #AdaDelta(),
                ])

            )

    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    #batch_size = 16
    batch_size = 32
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size)

        stream = DataStream(
                dataset=dataset,
                iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                #mask_sources=('features',)
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()
示例#14
0
    ### Identity testing
    from blocks.initialization import Identity, IsotropicGaussian
    from blocks import bricks
    from blocks.bricks import Sigmoid

    dim = 2
    floatX = theano.config.floatX
    x = tensor.tensor3('input')
    gru = GatedRecurrentFull(
        hidden_dim=dim,
        state_to_state_init=Identity(1.),
        #state_to_reset_init=Identity(1.),
        state_to_reset_init=IsotropicGaussian(0.2),
        state_to_update_init=Identity(1.0),
        activation=bricks.Identity(1.0),
        gate_activation=Sigmoid(),
        input_to_state_transform=Linear(
            input_dim=dim,
            output_dim=dim,
            weights_init=Identity(1.0),
            #weights_init=IsotropicGaussian(0.02),
            biases_init=Constant(0.0)),
        input_to_update_transform=Linear(
            input_dim=dim,
            output_dim=dim,
            #weights_init=Constant(0.0),
            weights_init=IsotropicGaussian(0.02),
            biases_init=Constant(2.0)),
        input_to_reset_transform=Linear(
            input_dim=dim,
            output_dim=dim,
示例#15
0
文件: vanilla.py 项目: mohammadpz/RNN
n_u = 225  # input vector size (not time at this point)
n_y = 225  # output vector size
n_h = 500  # numer of hidden units

iteration = 300  # number of epochs of gradient descent

print "Building Model"
# Symbolic variables
x = tensor.tensor3('x', dtype=floatX)
target = tensor.tensor3('target', dtype=floatX)

# Build the model
linear = Linear(input_dim=n_u, output_dim=n_h, name="first_layer")
rnn = SimpleRecurrent(dim=n_h, activation=Tanh())
linear2 = Linear(input_dim=n_h, output_dim=n_y, name="output_layer")
sigm = Sigmoid()

x_transform = linear.apply(x)
h = rnn.apply(x_transform)
predict = sigm.apply(linear2.apply(h))


# only for generation B x h_dim
h_initial = tensor.tensor3('h_initial', dtype=floatX)
h_testing = rnn.apply(x_transform, h_initial, iterate=False)
y_hat_testing = linear2.apply(h_testing)
y_hat_testing = sigm.apply(y_hat_testing)
y_hat_testing.name = 'y_hat_testing'


# Cost function
示例#16
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')

    #rnn = SimpleRecurrent(
            #dim = 50,
            #activation=Tanh(),
            #weights_init = Uniform(std=0.01),
            #biases_init = Constant(0.)
        #)

    #rnn = GatedRecurrent(
            #dim = 50,
            #activation=Tanh(),
            #weights_init = Uniform(std=0.01),
            #biases_init = Constant(0.)
        #)

    embedding_size = 300
    #glove_version = "vectors.6B.100d.txt"
    glove_version = "glove.6B.300d.txt"
    #fork = Fork(weights_init=IsotropicGaussian(0.02),
            #biases_init=Constant(0.),
            #input_dim=embedding_size,
            #output_dims=[embedding_size]*3,
            #output_names=['inputs', 'reset_inputs', 'update_inputs']
            #)

    rnn = LSTM(
            dim = embedding_size,
            activation=Tanh(),
            weights_init = IsotropicGaussian(std=0.02),
        )
    rnn.initialize()

    #fork.initialize()
    wstd = 0.02

    score_layer = Linear(
            input_dim = 128,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()

    gloveMapping = Linear(
            input_dim = embedding_size,
            output_dim = embedding_size,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.0),
            name="gloveMapping"
            )
    gloveMapping.initialize()
    o = gloveMapping.apply(x)
    o = Rectifier(name="rectivfyglove").apply(o)

    forget_bias = np.zeros((embedding_size*4), dtype=theano.config.floatX)
    forget_bias[embedding_size:embedding_size*2] = 4.0
    toLSTM = Linear(
            input_dim = embedding_size,
            output_dim = embedding_size*4,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(forget_bias),
            #biases_init = Constant(0.0),
            name="ToLSTM"
            )
    toLSTM.initialize()


    rnn_states, rnn_cells = rnn.apply(toLSTM.apply(o) * T.shape_padright(m), mask=m)
    #inputs, reset_inputs, update_inputs = fork.apply(x)
    #rnn_states = rnn.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs, mask=m)

    #rnn_out = rnn_states[:, -1, :]
    rnn_out = (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1) / m.sum(axis=1).dimshuffle(0, 'x')
    #rnn_out = (rnn_states).mean(axis=1)# / m.sum(axis=1)

    hidden = Linear(
        input_dim = embedding_size,
        output_dim = 128,
        weights_init = Uniform(std=0.01),
        biases_init = Constant(0.))
    hidden.initialize()

    o = hidden.apply(rnn_out)
    o = Rectifier().apply(o)
    hidden = Linear(
        input_dim = 128,
        output_dim = 128,
        weights_init = IsotropicGaussian(std=0.02),
        biases_init = Constant(0.),
        name="hiddenmap2")
    hidden.initialize()

    o = hidden.apply(o)
    o = Rectifier(name="rec2").apply(o)

    o = score_layer.apply(o)

    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cost,
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=10),
                AdaM(),
                #AdaDelta(),
                ])

            )


    # ========
    print "setting up data"

    train_dataset = IMDBText('train')
    test_dataset = IMDBText('test')
    batch_size = 16
    n_train = train_dataset.num_examples
    train_stream = DataStream(
            dataset=train_dataset,
            iteration_scheme=ShuffledScheme(
                examples=n_train,
                batch_size=batch_size)
            )
    glove = GloveTransformer(glove_version, data_stream=train_stream)

    train_padded = Padding(
            data_stream=glove,
            mask_sources=('features',)
            #mask_sources=[]
            )


    test_stream = DataStream(
            dataset=test_dataset,
            iteration_scheme=ShuffledScheme(
                examples=n_train,
                batch_size=batch_size)
            )
    glove = GloveTransformer(glove_version, data_stream=test_stream)
    test_padded = Padding(
            data_stream=glove,
            mask_sources=('features',)
            #mask_sources=[]
            )
    print "setting up model"
    #import ipdb
    #ipdb.set_trace()

    lstm_norm = rnn.W_state.norm(2)
    lstm_norm.name = "lstm_norm"

    pre_norm= gloveMapping.W.norm(2)
    pre_norm.name = "pre_norm"

    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=train_dataset.num_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification, lstm_norm, pre_norm],
        prefix='train',
        after_epoch=True
        ))

    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        data_stream=test_padded,
        prefix='test',
        after_epoch=True
        ))
    extensions.append(Timing())
    extensions.append(Printing())

    extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    extensions.append(Plot("result", channels=[['train_cost', 'train_misclassification']], after_epoch=True))

    main_loop = MainLoop(
            model=model,
            data_stream=train_padded,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')
    y = tensor.imatrix('targets')

    x_int = x.astype(dtype='int32').T
    train_dataset = IMDB()
    idx_sort = numpy.argsort(
        [len(s) for s in
         train_dataset.indexables[
             train_dataset.sources.index('features')]]
    )
    n_voc = len(train_dataset.dict.keys())
    for idx in xrange(len(train_dataset.sources)):
        train_dataset.indexables[idx] = train_dataset.indexables[idx][idx_sort]

    n_h = 100
    linear_embedding = LookupTable(
        length=n_voc,
        dim=4 * n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = LSTM(
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    rnn.initialize()
    score_layer = Linear(
        input_dim=n_h,
        output_dim=1,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = linear_embedding.apply(x_int) * tensor.shape_padright(m.T)
    rnn_out = rnn.apply(embedding)
    rnn_out_mean_pooled = rnn_out[0][-1]

    probs = Sigmoid().apply(
        score_layer.apply(rnn_out_mean_pooled))

    cost = - (y * tensor.log(probs)
              + (1 - y) * tensor.log(1 - probs)
              ).mean()
    cost.name = 'cost'

    misclassification = (y * (probs < 0.5)
                         + (1 - y) * (probs > 0.5)
                         ).mean()
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule(
            components=[StepClipping(threshold=10.),
                        Adam()
                        ]
        )
    )

    n_train = int(numpy.floor(.8 * train_dataset.num_examples))
    n_valid = int(numpy.floor(.1 * train_dataset.num_examples))
    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(n_train),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )
    valid_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(n_train, n_train + n_valid),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )
    test_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(n_train + n_valid,
                               train_dataset.num_examples),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        test_data_stream,
        prefix='test'))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        valid_data_stream,
        prefix='valid'))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification',
                   'valid_cost', 'valid_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('IMDB classification example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
示例#18
0
    def __init__(self):
        srng = MRG_RandomStreams(seed=123)

        X = T.matrix('features')
        self.X = X

        #drop = Dropout(p_drop=0.5)
        #o = drop.apply(X)
        o = X
        self.noisy = o

        #n_hidden = 64
        n_hidden = 128
        n_zs = 2
        self.n_zs = n_zs

        self.n_hidden = n_hidden

        l = Linear(input_dim=28 * 28,
                   output_dim=n_hidden,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        o = l.apply(o)
        o = Tanh().apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_hidden,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        o = l.apply(o)
        o = Tanh().apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_zs,
                   weights_init=IsotropicGaussian(.101),
                   biases_init=Constant(0))
        l.initialize()
        mu_encoder = l.apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_zs,
                   weights_init=IsotropicGaussian(0.1),
                   biases_init=Constant(0))
        l.initialize()
        log_sigma_encoder = l.apply(o)

        eps = srng.normal(log_sigma_encoder.shape)

        z = eps * T.exp(log_sigma_encoder) + mu_encoder

        z_to_h1_decode = Linear(input_dim=n_zs,
                                output_dim=n_hidden,
                                weights_init=IsotropicGaussian(0.1),
                                biases_init=Constant(0))
        z_to_h1_decode.initialize()

        h1_decode_to_h_decode = Linear(input_dim=n_hidden,
                                       output_dim=n_hidden,
                                       weights_init=IsotropicGaussian(0.01),
                                       biases_init=Constant(0))
        h1_decode_to_h_decode.initialize()

        #o = z_to_h_decode.apply(z)
        #h_decoder = Tanh().apply(o)

        h_decode_produce = Linear(input_dim=n_hidden,
                                  output_dim=28 * 28,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="linear4")
        h_decode_produce.initialize()
        #o = h_decode_produce.apply(h_decoder)

        #self.produced = Sigmoid().apply(o)

        seq = Sequence([
            z_to_h1_decode.apply,
            Tanh().apply, h1_decode_to_h_decode.apply,
            Tanh().apply, h_decode_produce.apply,
            Sigmoid().apply
        ])
        seq.initialize()

        self.produced = seq.apply(z)

        self.cost = T.sum(T.sqr(self.produced - X))  #regular old mean squared
        #self.cost = T.sum(T.nnet.binary_crossentropy(self.produced, X)) #T.sum(T.sqr(self.produced - X))
        self.cost.name = "cost"

        # Computed with L = 1, only one sample of produced.
        logpxz = T.sum(-1 * log_sigma_encoder * T.log(2 * np.pi) -
                       T.sqr((self.produced - X) /
                             (2 * T.exp(log_sigma_encoder))))

        self.variational_cost = - 0.5 * T.sum(1 + 2*log_sigma_encoder - mu_encoder * mu_encoder\
                - T.exp(2 * log_sigma_encoder)) + logpxz

        self.variational_cost.name = "variational_cost"

        self.Z = T.matrix('z')
        self.sampled = seq.apply(self.Z)

        cg = ComputationGraph([self.variational_cost])
        bricks = [
            get_brick(var) for var in cg.variables + cg.scan_variables
            if get_brick(var)
        ]
        for i, b in enumerate(bricks):
            b.name = b.name + "_" + str(i)
示例#19
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')

    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"

    o = x.sum(axis=1) + m.mean() * 0

    score_layer = Linear(
            input_dim = 300,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=0.02),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    # =================
    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cg.outputs[0],
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=4),
                AdaM(),
                ])

            )

    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    #batch_size = 16
    batch_size = 16
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size)

        stream = DataStream(
                dataset=dataset,
                iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    print "setting up model"

    n_examples = 25000
    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [
            cost,
            misclassification,
            ],
        prefix='train',
        after_epoch=True
        ))

    #extensions.append(DataStreamMonitoring(
        #[cost, misclassification],
        #data_stream=test_stream,
        #prefix='test',
        #after_epoch=True
        #))
    extensions.append(Timing())
    extensions.append(Printing())

    extensions.append(Plot(
        theano.config.device+"_result",
        channels=[['train_cost']],
        after_epoch=True
        ))


    main_loop = MainLoop(
            model=model,
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()
示例#20
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')
    x = m.mean() + x #stupid mask not always needed...

    #embedding_size = 300
    #glove_version = "glove.6B.300d.txt"

    embedding_size = 50
    glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0))
    conv1.initialize()
    o = conv1.apply(x)
    o = Rectifier(name="conv1red").apply(o)
    o = MaxPooling1D(pooling_length=5
            #, step=2
            ).apply(o)

    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv2")
    conv2.initialize()
    o = conv2.apply(o)

    o = Rectifier(name="conv2rec").apply(o)
    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv3")
    conv2.initialize()
    o = conv2.apply(o)
    o = Rectifier(name="conv3rec").apply(o)

    fork = Fork(weights_init=IsotropicGaussian(0.02),
            biases_init=Constant(0.),
            input_dim=128,
            output_dims=[128]*3,
            output_names=['inputs', 'reset_inputs', 'update_inputs']
            )
    fork.initialize()

    inputs, reset_inputs, update_inputs = fork.apply(o)

    out = o.mean(axis=1)

    #gru = GatedRecurrent(dim=128,
            #weights_init=IsotropicGaussian(0.02),
            #biases_init=IsotropicGaussian(0.0))

    #gru.initialize()
    #states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs)

    #out = states[:, -1, :]

    hidden = Linear(
        input_dim = 128,
        output_dim = 128,
        weights_init = Uniform(std=0.01),
        biases_init = Constant(0.))
    hidden.initialize()

    o = hidden.apply(out)
    o = Rectifier().apply(o)
    #hidden = Linear(
        #input_dim = 128,
        #output_dim = 128,
        #weights_init = IsotropicGaussian(std=0.02),
        #biases_init = Constant(0.),
        #name="hiddenmap2")
    #hidden.initialize()

    #o = hidden.apply(o)
    #o = Rectifier(name="rec2").apply(o)


    score_layer = Linear(
            input_dim = 128,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)

    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cost,
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=10),
                AdaM(),
                #AdaDelta(),
                ])

            )


    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    batch_size = 16
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')

        dataset = IMDBText(which_set)
        n_train = dataset.num_examples
        stream = DataStream(
                dataset=dataset,
                iteration_scheme=ShuffledScheme(
                    examples=n_train,
                    batch_size=batch_size)
                )
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    print "setting up model"
    #import ipdb
    #ipdb.set_trace()

    n_examples = 25000
    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True
        ))

    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        data_stream=test_stream,
        prefix='test',
        after_epoch=True
        ))
    extensions.append(Timing())
    extensions.append(Printing())

    #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True))

    main_loop = MainLoop(
            model=model,
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()
示例#21
0
    def __init__(self):
        srng = MRG_RandomStreams(seed=123)

        X = T.matrix('features')
        self.X = X

        #drop = Dropout(p_drop=0.5)
        #o = drop.apply(X)
        o = (X - 128) / 128.0
        self.scaled = o

        #n_hidden = 64
        n_hidden = 2048 * 2
        n_zs = 1024
        self.n_zs = n_zs

        self.n_hidden = n_hidden

        l = Linear(input_dim=32 * 32 * 3,
                   output_dim=n_hidden,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        o = l.apply(o)
        o = Rectifier().apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_hidden,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        o = l.apply(o)
        o = Rectifier().apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_zs,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        mu_encoder = l.apply(o)

        l = Linear(input_dim=n_hidden,
                   output_dim=n_zs,
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0))
        l.initialize()
        log_sigma_encoder = l.apply(o)

        eps = srng.normal(log_sigma_encoder.shape)

        z = eps * T.exp(log_sigma_encoder) + mu_encoder

        z_to_h1_decode = Linear(input_dim=n_zs,
                                output_dim=n_hidden,
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0))
        z_to_h1_decode.initialize()

        h1_decode_to_h_decode = Linear(input_dim=n_hidden,
                                       output_dim=n_hidden,
                                       weights_init=IsotropicGaussian(0.01),
                                       biases_init=Constant(0))
        h1_decode_to_h_decode.initialize()

        h_decode_produce = Linear(input_dim=n_hidden,
                                  output_dim=32 * 32 * 3,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="linear4")
        h_decode_produce.initialize()
        #o = h_decode_produce.apply(h_decoder)

        h_decode_produce = Linear(input_dim=n_hidden,
                                  output_dim=32 * 32 * 3,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="linear4")
        h_decode_produce.initialize()
        #self.produced = Sigmoid().apply(o)

        seq = Sequence([
            z_to_h1_decode.apply,
            Rectifier().apply, h1_decode_to_h_decode.apply,
            Rectifier().apply, h_decode_produce.apply,
            Sigmoid().apply
        ])
        seq.initialize()

        self.produced = seq.apply(z)

        self.cost = T.mean(T.sqr(self.produced - self.scaled))
        #self.cost = T.sum(T.nnet.binary_crossentropy(self.produced, self.scaled)) #T.sum(T.sqr(self.produced - self.scaled))
        self.cost.name = "cost"

        self.variational_cost = - 0.5 * T.mean(1 + 2*log_sigma_encoder - mu_encoder * mu_encoder\
                - T.exp(2 * log_sigma_encoder)) + self.cost
        self.variational_cost.name = "variational_cost"

        self.Z = T.matrix('z')
        self.sampled = seq.apply(self.Z)

        cg = ComputationGraph([self.variational_cost])
        bricks = [
            get_brick(var) for var in cg.variables + cg.scan_variables
            if get_brick(var)
        ]
        for i, b in enumerate(bricks):
            b.name = b.name + "_" + str(i)
示例#22
0
文件: cw.py 项目: mohammadpz/RNN
one_time = tensor.wscalar('one_time')
h_initial = tensor.matrix('h_initial', dtype=floatX)

# Build the model

clockwork = ClockWork(input_dim=n_u, module=module,
                      periods=periods, unit=unit,
                      activation=Sigmoid(),
                      name="clockwork rnn")
linear = Linear(input_dim=unit * module, output_dim=n_y, name="output_layer")
h = clockwork.apply(x, time)
predict = Sigmoid().apply(linear.apply(h))

# only for generation B x h_dim
h_testing = clockwork.apply(one_x, one_time, h_initial, iterate=False)
y_hat_testing = Sigmoid().apply(linear.apply(h_testing))
y_hat_testing.name = 'y_hat_testing'

# Cost function
cost = SquaredError().apply(predict, target)

# Initialization
for brick in (clockwork, linear):
    brick.weights_init = initialization.IsotropicGaussian(0.1)
    brick.biases_init = initialization.Constant(0)
    brick.initialize()

cg = ComputationGraph(cost)
print(VariableFilter(roles=[WEIGHT, BIAS])(cg.variables))

# Training process
示例#23
0
def main():
    x = T.imatrix('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')
    #x_int = x.astype(dtype='int32').T
    x_int = x.T

    train_dataset = IMDB('train')
    n_voc = len(train_dataset.dict.keys())
    n_h = 2
    lookup = LookupTable(length=n_voc + 2,
                         dim=n_h * 4,
                         weights_init=Uniform(std=0.01),
                         biases_init=Constant(0.))
    lookup.initialize()

    #rnn = SimpleRecurrent(
    #dim = n_h,
    #activation=Tanh(),
    #weights_init = Uniform(std=0.01),
    #biases_init = Constant(0.)
    #)
    rnn = LSTM(dim=n_h,
               activation=Tanh(),
               weights_init=Uniform(std=0.01),
               biases_init=Constant(0.))

    rnn.initialize()
    score_layer = Linear(input_dim=n_h,
                         output_dim=1,
                         weights_init=Uniform(std=0.01),
                         biases_init=Constant(0.))
    score_layer.initialize()

    embedding = lookup.apply(x_int) * T.shape_padright(m.T)
    #embedding = lookup.apply(x_int) + m.T.mean()*0
    #embedding = lookup.apply(x_int) + m.T.mean()*0

    rnn_states = rnn.apply(embedding, mask=m.T)
    #rnn_states, rnn_cells = rnn.apply(embedding)
    rnn_out_mean_pooled = rnn_states[-1]
    #rnn_out_mean_pooled = rnn_states.mean()

    probs = Sigmoid().apply(score_layer.apply(rnn_out_mean_pooled))

    cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    # =================

    cg = ComputationGraph([cost])
    params = cg.parameters
    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            StepClipping(threshold=10),
            Adam(),
            #AdaDelta(),
        ]))

    # ========

    test_dataset = IMDB('test')
    batch_size = 64
    n_train = train_dataset.num_examples
    train_stream = DataStream(dataset=train_dataset,
                              iteration_scheme=ShuffledScheme(
                                  examples=n_train, batch_size=batch_size))
    train_padded = Padding(data_stream=train_stream,
                           mask_sources=('features', )
                           #mask_sources=[]
                           )

    test_stream = DataStream(dataset=test_dataset,
                             iteration_scheme=ShuffledScheme(
                                 examples=n_train, batch_size=batch_size))
    test_padded = Padding(data_stream=test_stream,
                          mask_sources=('features', )
                          #mask_sources=[]
                          )
    #import ipdb
    #ipdb.set_trace()

    #======
    model = Model(cost)
    extensions = []
    extensions.append(
        EpochProgress(
            batch_per_epoch=train_dataset.num_examples // batch_size + 1))
    extensions.append(
        TrainingDataMonitoring([cost, misclassification],
                               prefix='train',
                               after_epoch=True))

    extensions.append(
        DataStreamMonitoring([cost, misclassification],
                             data_stream=test_padded,
                             prefix='test',
                             after_epoch=True))
    extensions.append(Timing())
    extensions.append(Printing())

    main_loop = MainLoop(model=model,
                         data_stream=train_padded,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()