示例#1
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784],
              weights_init=IsotropicGaussian(), biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
示例#2
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
示例#3
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
示例#4
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
示例#5
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(),
                   dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
示例#6
0
文件: sqrt.py 项目: basaundi/blocks
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
示例#7
0
def test_square():
    from blocks.bricks.cost import SquaredError
    x = tensor.tensor3()
    y = tensor.tensor3()

    c = SquaredError()
    o = c.apply(x,y)
    f = theano.function([x,y],o)
    print(f(np.ones((3,3,3),dtype=theano.config.floatX),5*np.ones((3,3,3),dtype=theano.config.floatX)))
def get_costs(presoft, args):

    if has_indices(args.dataset):
        # Targets: (Time X Batch)
        y = tensor.lmatrix('targets')
        y_mask = tensor.ones_like(y, dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        time, batch, feat = presoft.shape
        cross_entropy = Softmax().categorical_cross_entropy(
            (y.flatten() * y_mask.reshape((batch * time, ))), (presoft.reshape(
                (batch * time, feat)) * y_mask.reshape((batch * time, 1))))

        # renormalization
        renormalized_cross_entropy = cross_entropy * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))

        # BPC: Bits Per Character
        unregularized_cost = renormalized_cross_entropy / tensor.log(2)
        unregularized_cost.name = "cross_entropy"

    else:
        # Targets: (Time X Batch X Features)
        y = tensor.tensor3('targets', dtype=floatX)
        y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        if args.used_inputs is not None:
            y_mask = tensor.set_subtensor(
                y_mask[:args.used_inputs, :],
                tensor.zeros_like(y_mask[:args.used_inputs, :], dtype=floatX))
        # SquaredError does not work on 3D tensor
        target = (y * y_mask.dimshuffle(0, 1, 'x'))
        values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x'))

        target = target.reshape(
            (target.shape[0] * target.shape[1], target.shape[2]))

        values = values.reshape(
            (values.shape[0] * values.shape[1], values.shape[2]))

        unregularized_cost = SquaredError().apply(target, values)
        # renormalization
        unregularized_cost = unregularized_cost * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))
        unregularized_cost.name = "mean_squared_error"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = unregularized_cost + tensor.log(1)
    cost.name = "regularized_cost"
    return cost, unregularized_cost
示例#9
0
def test_square():
    from blocks.bricks.cost import SquaredError
    x = tensor.tensor3()
    y = tensor.tensor3()

    c = SquaredError()
    o = c.apply(x, y)
    f = theano.function([x, y], o)
    print(
        f(np.ones((3, 3, 3), dtype=theano.config.floatX), 5 * np.ones(
            (3, 3, 3), dtype=theano.config.floatX)))
示例#10
0
    def decoder(self, clean, corr, batch_size):
        get_unlabeled = lambda x: x[batch_size:] if x is not None else x
        est = self.new_activation_dict()
        costs = AttributeDict()
        costs.denois = AttributeDict()
        for i, ((_, spec), act_f) in self.layers[::-1]:
            z_corr = get_unlabeled(corr.z[i])
            z_clean = get_unlabeled(clean.z[i])
            z_clean_s = get_unlabeled(clean.s.get(i))
            z_clean_m = get_unlabeled(clean.m.get(i))

            # It's the last layer
            if i == len(self.layers) - 1:
                fspec = (None, None)
                ver = get_unlabeled(corr.h[i])
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                fspec = self.layers[i + 1][1][0]
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            # For semi-supervised version
            if z_clean_s:
                z_est_norm = (z_est - z_clean_m) / z_clean_s
            else:
                z_est_norm = z_est
            z_est_norm = z_est

            se = SquaredError('denois' + str(i))
            costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                       z_clean.flatten(2)) \
                / np.prod(self.layer_dims[i], dtype=floatX)
            costs.denois[i].name = 'denois' + str(i)

            # Store references for later use
            est.z[i] = z_est
            est.h[i] = apply_act(z_est, act_f)
            est.s[i] = None
            est.m[i] = None
        return est, costs
示例#11
0
    def decoder(self, clean, corr):
        est = self.new_activation_dict()
        costs = AttributeDict()
        costs.denois = AttributeDict()
        for i, ((_, spec), act_f) in self.layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)

            # It's the last layer
            if i == len(self.layers) - 1:
                fspec = (None, None)
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                fspec = self.layers[i + 1][1][0]
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            # The first layer
            if z_clean_s:
                z_est_norm = (z_est - z_clean_m) / z_clean_s
            else:
                z_est_norm = z_est

            se = SquaredError('denois' + str(i))
            costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                       z_clean.flatten(2)) \
                / np.prod(self.layer_dims[i], dtype=floatX)
            costs.denois[i].name = 'denois' + str(i)

            # Store references for later use
            est.z[i] = z_est
            est.h[i] = apply_act(z_est, act_f)
            est.s[i] = None
            est.m[i] = None
        return est, costs
示例#12
0
    def decoder(self, clean, corr, batch_size):
        get_unlabeled = lambda x: x[batch_size:] if x is not None else x
        est = self.new_activation_dict()
        costs = AttributeDict()
        costs.denois = AttributeDict()
        for i, ((_, spec), act_f) in self.layers[::-1]:
            z_corr = get_unlabeled(corr.z[i])
            z_clean = get_unlabeled(clean.z[i])
            z_clean_s = get_unlabeled(clean.s.get(i))
            z_clean_m = get_unlabeled(clean.m.get(i))

            # It's the last layer
            if i == len(self.layers) - 1:
                fspec = (None, None)
                ver = get_unlabeled(corr.h[i])
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                fspec = self.layers[i + 1][1][0]
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(
                z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], num=i, fspec=fspec, top_g=top_g
            )

            # For semi-supervised version
            if z_clean_s:
                z_est_norm = (z_est - z_clean_m) / z_clean_s
            else:
                z_est_norm = z_est
            z_est_norm = z_est

            se = SquaredError("denois" + str(i))
            costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) / np.prod(
                self.layer_dims[i], dtype=floatX
            )
            costs.denois[i].name = "denois" + str(i)

            # Store references for later use
            est.z[i] = z_est
            est.h[i] = apply_act(z_est, act_f)
            est.s[i] = None
            est.m[i] = None
        return est, costs
示例#13
0
def test_collect():
    x = tensor.matrix()
    mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784],
              use_bias=False)
    cost = SquaredError().apply(x, mlp.apply(x))
    cg = ComputationGraph(cost)
    var_filter = VariableFilter(roles=[PARAMETER])
    W1, W2 = var_filter(cg.variables)
    for i, W in enumerate([W1, W2]):
        W.set_value(numpy.ones_like(W.get_value()) * (i + 1))
    new_cg = collect_parameters(cg, cg.shared_variables)
    collected_parameters, = new_cg.shared_variables
    assert numpy.all(collected_parameters.get_value()[:784 * 100] == 1.)
    assert numpy.all(collected_parameters.get_value()[784 * 100:] == 2.)
    assert collected_parameters.ndim == 1
    W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables)
    assert W1.eval().shape == (784, 100)
    assert numpy.all(W1.eval() == 1.)
    assert W2.eval().shape == (100, 784)
    assert numpy.all(W2.eval() == 2.)
示例#14
0
def build_autoencoder(features, labels_num, labels_cat):

    mlp_bottom = MLP(activations=[
        Rectifier(),
        Rectifier(),
        Rectifier(),
        Rectifier(),
        Rectifier()
    ],
                     dims=[24033, 5000, 1000, 100, 1000, 5000],
                     weights_init=IsotropicGaussian(),
                     biases_init=Constant(1))
    mlp_bottom.initialize()

    mlp_top = build_top_mlp()
    mlp_top.push_initialization_config()
    mlp_top.initialize()

    # a = mlp_bottom.apply(features)
    # b = mlp_top.apply(a)

    # Construct feedforward sequence
    ss_seq = Sequence([mlp_bottom.apply, mlp_top.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    [outputs_numerical, outputs_categorical] = ss_seq.apply(features)

    cost = SquaredError().apply(
        labels_num, outputs_numerical) + BinaryCrossEntropy().apply(
            labels_cat, outputs_categorical)

    cg = ComputationGraph(cost)

    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    #cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    #cost_dropout1 = cg_dropout1.outputs[0]

    return cost, cg.parameters
示例#15
0
    def train(self):

        x = self.sharedBatch['x']
        x.name = 'x_myinput'
        xmini = self.sharedBatch['xmini']
        xmini.name = 'xmini_myinput'
        y = self.sharedBatch['y']
        y.name = 'y_myinput'

        # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
        # LSTM layer documentation for the explanation
        x_to_h = Linear(self.input_dimx,
                        self.dim,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        xmini_to_h = Linear(self.input_dimxmini,
                            self.mini_dim,
                            name='xmini_to_h',
                            weights_init=IsotropicGaussian(),
                            biases_init=Constant(0.0))

        rnnwmini = RNNwMini(dim=self.dim,
                            mini_dim=self.mini_dim,
                            summary_dim=self.summary_dim)

        h_to_o = Linear(self.summary_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        xmini_transform = xmini_to_h.apply(xmini)

        h = rnnwmini.apply(x=x_transform, xmini=xmini_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        y_hat = h_to_o.apply(h[-1])
        #y_hat = Logistic().apply(y_hat)

        cost = SquaredError().apply(y, y_hat)
        cost.name = 'cost'

        rnnwmini.initialize()
        x_to_h.initialize()
        xmini_to_h.initialize()
        h_to_o.initialize()

        self.f = theano.function(inputs=[], outputs=y_hat)

        #print("self.f === ")
        #print(self.f())
        #print(self.f().shape)
        #print("====")

        self.cg = ComputationGraph(cost)
        m = Model(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=self.cg.parameters,
                                    step_rule=RMSProp(learning_rate=0.01),
                                    on_unused_sources='ignore')
        valid_monitor = DataStreamMonitoringShared(
            variables=[cost],
            data_stream=self.stream_valid_int,
            prefix="valid",
            sharedBatch=self.sharedBatch,
            sharedData=self.sharedData)
        train_monitor = TrainingDataMonitoring(variables=[cost],
                                               prefix="train",
                                               after_epoch=True)

        sharedVarMonitor = SwitchSharedReferences(self.sharedBatch,
                                                  self.sharedData)
        tBest = self.track_best('valid_cost', self.cg)
        self.tracker = tBest[0]
        extensions = [sharedVarMonitor, valid_monitor] + tBest

        if self.debug:
            extensions.append(Printing())

        self.algorithm = algorithm
        self.extensions = extensions
        self.model = m
        self.mainloop = MainLoop(self.algorithm,
                                 self.stream_train_int,
                                 extensions=self.extensions,
                                 model=self.model)
        self.main_loop(True)
示例#16
0
文件: ladder.py 项目: Scyfer/ladder
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.target_labeled = target_labeled
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.default_lr

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat, 'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i+1][1][0] if len(layers) > i+1 else (None, None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' % (
                i, l_type,
                denois_print,
                self.layer_dims.get(i+1),
                self.layer_dims.get(i)
                ))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
示例#17
0
input_dim = 512
hidden_dims = [int(dim) for dim in args.dim.split(",")]

if args.batchnorm:
    network = BatchNormalizedMLP
else:
    network = MLP

autoencoder = network(activations=[Tanh() for _ in xrange(len(hidden_dims))] + [Identity()],
                  dims=[input_dim] + hidden_dims + [input_dim],
                  weights_init=Uniform(width=0.02), biases_init=Constant(0))
autoencoder.initialize()

hopefully_states_again = autoencoder.apply(states)

cost = SquaredError().apply(hopefully_states_again, states)
cost.name = "squared_error"
cost_model = Model(cost)

algorithm = GradientDescent(cost=cost, parameters=cost_model.parameters,
                            step_rule=Adam())

# handle data
data = H5PYDataset(args.file, which_sets=("train",), load_in_memory=True)
# trash data for testing
"""
dataraw = numpy.zeros((10000, 512), dtype="float32")
for row in xrange(dataraw.shape[0]):
    dataraw[row] = numpy.random.rand(512)
data = OrderedDict()
data["act_seqs"] = dataraw
示例#18
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()
        self.oos = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h,
                                              prev_dim,
                                              spec,
                                              i,
                                              act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat,
                                       'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i + 1][1][0] if len(layers) > i + 1 else (None,
                                                                     None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m
                                  ) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' %
                        (i, l_type, denois_print, self.layer_dims.get(i + 1),
                         self.layer_dims.get(i)))

        # Costs
        y = target_labeled.flatten()

        Q = int(self.layer_dims[top][0]) - 1
        logger.info('Q=%d' % Q)
        costs.class_clean = CategoricalCrossEntropyIV(
            Q=Q,
            alpha=self.p.alpha,
            beta=self.p.beta,
            dbeta=self.p.dbeta,
            gamma=self.p.gamma,
            gamma1=self.p.gamma1).apply(y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropyIV(
            Q=Q,
            alpha=self.p.alpha,
            beta=self.p.beta,
            dbeta=self.p.dbeta,
            gamma=self.p.gamma,
            gamma1=self.p.gamma1,
        ).apply(y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        if self.p.alpha_clean:
            y_true = y
            eps = np.float32(1e-6)

            # scale preds so that the class probas of each sample sum to 1
            y_pred = clean.labeled.h[top] + eps
            y_pred /= y_pred.sum(axis=-1, keepdims=True)

            y0 = T.or_(T.eq(y_true, 0), T.gt(y_true,
                                             Q))  # out-of-set or unlabeled
            y0sum = y0.sum() + eps  # number of oos

            cost1 = T.nnet.categorical_crossentropy(y_pred, y_pred)
            cost1 = T.dot(y0,
                          cost1) / y0sum  # average cost per labeled example
            costs.total += self.p.alpha_clean * cost1

        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRateIV(oos_thr=self.p.oos_thr)
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
        oosr = OOSRateIV()
        self.oos.clean = oosr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.oos.clean.name = 'oos_rate_clean'
def get_costs(presoft, args):

    if has_indices(args.dataset):
        # Targets: (Time X Batch)
        y = tensor.lmatrix('targets')
        y_mask = tensor.ones_like(y, dtype=floatX)
        y_mask = tensor.set_subtensor(y_mask[:args.context, :],
                                      tensor.zeros_like(y_mask[:args.context,
                                                               :],
                                                        dtype=floatX))

        time, batch, feat = presoft.shape
        cross_entropy = Softmax().categorical_cross_entropy(
            (y.flatten() *
                y_mask.reshape((batch * time, ))),
            (presoft.reshape((batch * time, feat)) *
                y_mask.reshape((batch * time, 1))))

        # renormalization
        renormalized_cross_entropy = cross_entropy * (
            tensor.sum(tensor.ones_like(y_mask)) /
            tensor.sum(y_mask))

        # BPC: Bits Per Character
        unregularized_cost = renormalized_cross_entropy / tensor.log(2)
        unregularized_cost.name = "cross_entropy"

    else:
        # Targets: (Time X Batch X Features)
        y = tensor.tensor3('targets', dtype=floatX)
        y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX)
        y_mask = tensor.set_subtensor(y_mask[:args.context, :],
                                      tensor.zeros_like(y_mask[:args.context, :],
                                                        dtype=floatX))

        if args.used_inputs is not None:
            y_mask = tensor.set_subtensor(y_mask[:args.used_inputs, :],
                                          tensor.zeros_like(y_mask[:args.used_inputs, :],
                                                            dtype=floatX))
        # SquaredError does not work on 3D tensor
        target = (y * y_mask.dimshuffle(0, 1, 'x'))
        values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x'))

        target = target.reshape((target.shape[0] * target.shape[1],
                                 target.shape[2]))

        values = values.reshape((values.shape[0] * values.shape[1],
                                 values.shape[2]))

        unregularized_cost = SquaredError().apply(target, values)
        # renormalization
        unregularized_cost = unregularized_cost * (
            tensor.sum(tensor.ones_like(y_mask)) /
            tensor.sum(y_mask))
        unregularized_cost.name = "mean_squared_error"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = unregularized_cost + tensor.log(1)
    cost.name = "regularized_cost"
    return cost, unregularized_cost
示例#20
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h,
                                              prev_dim,
                                              spec,
                                              i,
                                              act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat,
                                       'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i + 1][1][0] if len(layers) > i + 1 else (None,
                                                                     None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost
                if z_clean_s:
                    z_est_norm = (z_est - z_clean_m) / z_clean_s
                else:
                    z_est_norm = z_est

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' %
                        (i, l_type, denois_print, self.layer_dims.get(i + 1),
                         self.layer_dims.get(i)))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
示例#21
0
def train_lstm(train, test, input_dim,
               hidden_dimension, columns, epochs,
               save_file, execution_name, batch_size, plot):
    stream_train = build_stream(train, batch_size, columns)
    stream_test = build_stream(test, batch_size, columns)

    # The train stream will return (TimeSequence, BatchSize, Dimensions) for
    # and the train test will return (TimeSequence, BatchSize, 1)

    x = T.tensor3('x')
    y = T.tensor3('y')

    y = y.reshape((y.shape[1], y.shape[0], y.shape[2]))

    # input_dim = 6
    # output_dim = 1
    linear_lstm = LinearLSTM(input_dim, 1, hidden_dimension,
                             # print_intermediate=True,
                             print_attrs=['__str__', 'shape'])

    y_hat = linear_lstm.apply(x)
    linear_lstm.initialize()

    c_test = AbsolutePercentageError().apply(y, y_hat)
    c_test.name = 'mape'

    c = SquaredError().apply(y, y_hat)
    c.name = 'cost'

    cg = ComputationGraph(c_test)

    def one_perc_min(current_value, best_value):
        if (1 - best_value / current_value) > 0.01:
            return best_value
        else:
            return current_value

    extensions = []

    extensions.append(DataStreamMonitoring(variables=[c, c_test],
                                           data_stream=stream_test,
                                           prefix='test',
                                           after_epoch=False,
                                           every_n_epochs=100))

    extensions.append(TrainingDataMonitoring(variables=[c_test],
                                             prefix='train',
                                             after_epoch=True))

    extensions.append(FinishAfter(after_n_epochs=epochs))

    # extensions.append(Printing())
    # extensions.append(ProgressBar())

    extensions.append(TrackTheBest('test_mape', choose_best=one_perc_min))
    extensions.append(TrackTheBest('test_cost', choose_best=one_perc_min))
    extensions.append(FinishIfNoImprovementAfter('test_cost_best_so_far', epochs=500))

    # Save only parameters, not the whole main loop and only when best_test_cost is updated
    checkpoint = Checkpoint(save_file, save_main_loop=False, after_training=False)
    checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('test_cost_best_so_far'))
    extensions.append(checkpoint)

    if BOKEH_AVAILABLE and plot:
        extensions.append(Plot(execution_name, channels=[[  # 'train_cost',
                                                          'test_cost']]))

    step_rule = Adam()

    algorithm = GradientDescent(cost=c_test, parameters=cg.parameters, step_rule=step_rule)
    main_loop = MainLoop(algorithm, stream_train, model=Model(c_test), extensions=extensions)
    main_loop.run()

    test_mape = 0
    if main_loop.log.status.get('best_test_mape', None) is None:
        with open(save_file, 'rb') as f:
            parameters = load_parameters(f)
            model = main_loop.model
            model.set_parameter_values(parameters)
            ev = DatasetEvaluator([c_test])
            test_mape = ev.evaluate(stream_test)['mape']
    else:
        test_mape = main_loop.log.status['best_test_mape']

    return test_mape, main_loop.log.status['epochs_done']
示例#22
0
    test_dataset = [word_bank.convert_to_vectors_and_labels(sentence) for sentence in test_sentences]

    # MODEL SETUP
    textRNN = TextRNN(dim_in=VECTOR_SIZE, dim_hidden=HIDDEN_UNITS, dim_out=VECTOR_SIZE)

    output = textRNN.run(inputs=x)
    #get_states_and_output = T.function([x, x_mask], [output])

    # COST SETUP
    #y_hat = np.float32(np.ones((3,1)))
    labels = np.float32([data[1] for data in dataset])
    inputs_data = np.float32([data[0] for data in dataset])
    test_labels = np.float32([data[1] for data in test_dataset])
    test_inputs_data = np.float32([data[0] for data in test_dataset])

    cost = SquaredError().apply(y, output)
    cost.name = 'MSE_with_regularization'
    cg = ComputationGraph(cost)

    #inputs = VariableFilter(roles=[INPUT], bricks=[SimpleRecurrent])(cg.variables)
    #inputs = [inputs[0]]
    #cg_dropout = apply_dropout(cg, inputs, 0.5)
    #fprop_dropout = T.function([cg_dropout.inputs], [cg_dropout.outputs[0]])
    #dropped_out = VariableFilter(roles=[DROPOUT])(cg.variables)
    #inputs_referenced = [var.tag.replacement_of for var in dropped_out]
    #set(inputs) == set(inputs_referenced)

    get_states_and_output = T.function([x], [output])

    #W = VariableFilter(roles=[WEIGHT])(cg.variables)
    #W = W
示例#23
0
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat  = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'

#ipdb.set_trace()

#ComputationGraph(y_hat).get_theano_function()(features_test)[0].shape
#ComputationGraph(cost).get_theano_function()(features_test, targets_test)[0].shape

cg = ComputationGraph(cost)

#cg = ComputationGraph(hiddens).get_theano_function()
#ipdb.set_trace()
algorithm = GradientDescent(cost=cost, 
                            params=cg.parameters,
                            step_rule=CompositeRule([StepClipping(5.0),
                                                     Scale(0.01)]))
示例#24
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 state_dim,
                 att_dim,
                 maxout_dim,
                 representation_dim,
                 attention_strategy='content',
                 attention_sources='s',
                 readout_sources='sfa',
                 memory='none',
                 memory_size=500,
                 seq_len=50,
                 init_strategy='last',
                 theano_seed=None,
                 **kwargs):
        """Creates a new decoder brick without embedding.
        
        Args:
            vocab_size (int): Target language vocabulary size
            embedding_dim (int): Size of feedback embedding layer
            state_dim (int): Number of hidden units
            att_dim (int): Size of attention match vector
            maxout_dim (int): Size of maxout layer
            representation_dim (int): Dimension of source annotations
            attention_strategy (string): Which attention should be used
                                         cf.  ``_initialize_attention``
            attention_sources (string): Defines the sources used by the 
                                        attention model 's' for decoder
                                        states, 'f' for feedback
            readout_sources (string): Defines the sources used in the 
                                      readout network. 's' for decoder
                                      states, 'f' for feedback, 'a' for
                                      attention (context vector)
            memory (string): Which external memory should be used
                             (cf.  ``_initialize_attention``)
            memory_size (int): Size of the external memory structure
            seq_len (int): Maximum sentence length
            init_strategy (string): How to initialize the RNN state
                                    (cf.  ``GRUInitialState``)
            theano_seed: Random seed
        """
        super(NoLookupDecoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed

        # Initialize gru with special initial state
        self.transition = GRUInitialState(attended_dim=state_dim,
                                          init_strategy=init_strategy,
                                          dim=state_dim,
                                          activation=Tanh(),
                                          name='decoder')

        # Initialize the attention mechanism
        att_dim = att_dim if att_dim > 0 else state_dim
        self.attention, src_names = _initialize_attention(
            attention_strategy, seq_len, self.transition, representation_dim,
            att_dim, attention_sources, readout_sources, memory, memory_size)

        # Initialize the readout, note that SoftmaxEmitter emits -1 for
        # initial outputs which is used by LookupFeedBackWMT15
        maxout_dim = maxout_dim if maxout_dim > 0 else state_dim
        readout = Readout(
            source_names=src_names,
            readout_dim=embedding_dim,
            emitter=NoLookupEmitter(initial_output=-1,
                                    readout_dim=embedding_dim,
                                    cost_brick=SquaredError()),
            #                        cost_brick=CategoricalCrossEntropy()),
            feedback_brick=TrivialFeedback(output_dim=embedding_dim),
            post_merge=InitializableFeedforwardSequence([
                Bias(dim=maxout_dim, name='maxout_bias').apply,
                Maxout(num_pieces=2, name='maxout').apply,
                Linear(input_dim=maxout_dim / 2,
                       output_dim=embedding_dim,
                       use_bias=False,
                       name='softmax0').apply,
                Logistic(name='softmax1').apply
            ]),
            merged_dim=maxout_dim)

        # Build sequence generator accordingly
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([
                name
                for name in self.transition.apply.sequences if name != 'mask'
            ],
                      prototype=Linear()))

        self.children = [self.sequence_generator]
示例#25
0
文件: ladder.py 项目: fulldecent/LRE
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()
        self.oos = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat, 'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i+1][1][0] if len(layers) > i+1 else (None, None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' % (
                i, l_type,
                denois_print,
                self.layer_dims.get(i+1),
                self.layer_dims.get(i)
                ))

        # Costs
        y = target_labeled.flatten()

        Q = int(self.layer_dims[top][0]) - 1
        logger.info('Q=%d'%Q)
        costs.class_clean = CategoricalCrossEntropyIV(Q=Q,
                                                      alpha=self.p.alpha,
                                                      beta=self.p.beta,
                                                      dbeta=self.p.dbeta,
                                                      gamma=self.p.gamma,
                                                      gamma1=self.p.gamma1
                                                      ).apply(y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropyIV(Q=Q,
                                                     alpha=self.p.alpha,
                                                     beta=self.p.beta,
                                                     dbeta=self.p.dbeta,
                                                     gamma=self.p.gamma,
                                                     gamma1=self.p.gamma1,
                                                     ).apply(y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        if self.p.alpha_clean:
            y_true = y
            eps = np.float32(1e-6)

            # scale preds so that the class probas of each sample sum to 1
            y_pred = clean.labeled.h[top] + eps
            y_pred /= y_pred.sum(axis=-1, keepdims=True)

            y0 = T.or_(T.eq(y_true, 0), T.gt(y_true, Q))  # out-of-set or unlabeled
            y0sum = y0.sum() + eps  # number of oos

            cost1 = T.nnet.categorical_crossentropy(y_pred, y_pred)
            cost1 = T.dot(y0, cost1) / y0sum  # average cost per labeled example
            costs.total += self.p.alpha_clean * cost1

        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRateIV(oos_thr=self.p.oos_thr)
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
        oosr = OOSRateIV()
        self.oos.clean = oosr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.oos.clean.name = 'oos_rate_clean'
示例#26
0
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'

#ipdb.set_trace()

#ComputationGraph(y_hat).get_theano_function()(features_test)[0].shape
#ComputationGraph(cost).get_theano_function()(features_test, targets_test)[0].shape

cg = ComputationGraph(cost)

#cg = ComputationGraph(hiddens).get_theano_function()
#ipdb.set_trace()
algorithm = GradientDescent(cost=cost,
                            params=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(5.0),