Exemplo n.º 1
0
def test_deterministic():
    seed = utt.fetch_seed()
    sample_size = (10, 20)

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        #print 'use_cuda =', use_cuda
        R = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u = R.uniform(size=sample_size)
        f = theano.function([], u)

        fsample1 = f()
        fsample2 = f()
        assert not numpy.allclose(fsample1, fsample2)

        R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u2 = R2.uniform(size=sample_size)
        g = theano.function([], u2)
        gsample1 = g()
        gsample2 = g()
        assert numpy.allclose(fsample1, gsample1)
        assert numpy.allclose(fsample2, gsample2)
Exemplo n.º 2
0
def test_uniform_broadcastable():
    x = tensor.matrix()
    size1 = (10, 1)
    size2 = (x.shape[0], 1)

    R = MRG_RandomStreams(234, use_cuda=False)

    # check when all dimensions are constant
    uu = R.uniform(size=size1)
    assert uu.broadcastable == (False, True)

    # check when some dimensions are theano variables
    uu = R.uniform(size=size2)
    assert uu.broadcastable == (False, True)
Exemplo n.º 3
0
def test_seed_fn():
    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)
    idx = tensor.ivector()
    for use_cuda in test_use_cuda:
        if config.mode == 'FAST_COMPILE' and use_cuda:
            mode = 'FAST_RUN'
        else:
            mode = config.mode

        for new_seed, same in [(234, True), (None, True), (23, False)]:
            random = MRG_RandomStreams(234, use_cuda=use_cuda)
            fn1 = theano.function([], random.uniform((2, 2), dtype='float32'),
                                  mode=mode)
            fn2 = theano.function([], random.uniform((3, 3), nstreams=2,
                                                     dtype='float32'),
                                  mode=mode)
            fn3 = theano.function([idx],
                                  random.uniform(idx, nstreams=3, ndim=1,
                                                 dtype='float32'),
                                  mode=mode)

            fn1_val0 = fn1()
            fn1_val1 = fn1()
            assert not numpy.allclose(fn1_val0, fn1_val1)
            fn2_val0 = fn2()
            fn2_val1 = fn2()
            assert not numpy.allclose(fn2_val0, fn2_val1)
            fn3_val0 = fn3([4])
            fn3_val1 = fn3([4])
            assert not numpy.allclose(fn3_val0, fn3_val1)
            assert fn1_val0.size == 4
            assert fn2_val0.size == 9

            random.seed(new_seed)

            fn1_val2 = fn1()
            fn1_val3 = fn1()
            fn2_val2 = fn2()
            fn2_val3 = fn2()
            fn3_val2 = fn3([4])
            fn3_val3 = fn3([4])
            assert numpy.allclose(fn1_val0, fn1_val2) == same
            assert numpy.allclose(fn1_val1, fn1_val3) == same
            assert numpy.allclose(fn2_val0, fn2_val2) == same
            assert numpy.allclose(fn2_val1, fn2_val3) == same
            assert numpy.allclose(fn3_val0, fn3_val2) == same
            assert numpy.allclose(fn3_val1, fn3_val3) == same
Exemplo n.º 4
0
def test_consistency_randomstreams():
    '''Verify that the random numbers generated by MRG_RandomStreams
    are the same as the reference (Java) implementation by L'Ecuyer et al.
    '''

    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        #print 'use_cuda =', use_cuda
        samples = []
        rng = MRG_RandomStreams(seed=seed, use_cuda=False)
        for i in range(n_streams):
            stream_samples = []
            u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
            f = theano.function([], u)
            for j in range(n_samples):
                s = f()
                stream_samples.append(s)
            stream_samples = numpy.array(stream_samples)
            stream_samples = stream_samples.T.flatten()
            samples.append(stream_samples)

        samples = numpy.array(samples).flatten()
        assert(numpy.allclose(samples, java_samples))
Exemplo n.º 5
0
def compare_speed():
    # To run this speed comparison
    # cd <directory of this file>
    # THEANO_FLAGS=device=gpu \
    #   python -c 'import test_rng_curand; test_rng_curand.compare_speed()'

    mrg = MRG_RandomStreams()
    crn = CURAND_RandomStreams(234)

    N = 1000 * 100

    dest = theano.shared(numpy.zeros(N, dtype=theano.config.floatX))

    mrg_u = theano.function([], [], updates={dest: mrg.uniform((N,))},
            profile='mrg uniform')
    crn_u = theano.function([], [], updates={dest: crn.uniform((N,))},
            profile='crn uniform')
    mrg_n = theano.function([], [], updates={dest: mrg.normal((N,))},
            profile='mrg normal')
    crn_n = theano.function([], [], updates={dest: crn.normal((N,))},
            profile='crn normal')

    for f in mrg_u, crn_u, mrg_n, crn_n:
        # don't time the first call, it has some startup cost
        print('DEBUGPRINT')
        print('----------')
        theano.printing.debugprint(f)

    for i in range(100):
        for f in mrg_u, crn_u, mrg_n, crn_n:
            # don't time the first call, it has some startup cost
            f.fn.time_thunks = (i > 0)
            f()
Exemplo n.º 6
0
    def compute_output(self, network, in_vw):
        # gather hyperparameters
        deterministic = network.find_hyperparameter(["deterministic"])
        l = network.find_hyperparameter(["alpha_lower"],
                                        3)
        u = network.find_hyperparameter(["alpha_upper"],
                                        8)

        if deterministic:
            negative_coefficient = 2.0 / (l + u)
        else:
            # TODO save this state so that we can seed the rng
            srng = MRG_RandomStreams()
            alphas = srng.uniform(size=in_vw.symbolic_shape(),
                                  low=l,
                                  high=u)
            negative_coefficient = 1.0 / alphas

        # return output
        network.create_vw(
            "default",
            variable=treeano.utils.rectify(
                in_vw.variable,
                negative_coefficient=negative_coefficient),
            shape=in_vw.shape,
            tags={"output"},
        )
Exemplo n.º 7
0
def test_broadcastable():
    R = MRG_RandomStreams(234)
    x = tensor.matrix()
    size1 = (10, 1)
    size2 = (x.shape[0], 1)
    pvals_1 = np.random.uniform(0, 1, size=size1)
    pvals_1 = pvals_1 / sum(pvals_1)
    pvals_2 = R.uniform(size=size2)
    pvals_2 = pvals_2 / tensor.sum(pvals_2)

    for distribution in [R.uniform, R.binomial, R.multinomial, R.multinomial_wo_replacement, R.normal]:
        # multinomial or multinomial_wo_replacement does not support "size" argument,
        # the sizes of them are implicitly defined with "pvals" argument.
        if distribution in [R.multinomial, R.multinomial_wo_replacement]:
            # check when all dimensions are constant
            uu = distribution(pvals=pvals_1)
            assert uu.broadcastable == (False, True)

            # check when some dimensions are theano variables
            uu = distribution(pvals=pvals_2)
            assert uu.broadcastable == (False, True)
        else:
            # check when all dimensions are constant
            uu = distribution(size=size1)
            assert uu.broadcastable == (False, True)

            # check when some dimensions are theano variables
            uu = distribution(size=size2)
            assert uu.broadcastable == (False, True)
Exemplo n.º 8
0
    def prediction(self, h, bias):
        srng = RandomStreams(seed=42)

        prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \
            self.compute_parameters(h, bias)

        mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1)

        v = T.arange(0, mean_x.shape[0])
        m_x = mean_x[v, mode]
        m_y = mean_y[v, mode]
        s_x = std_x[v, mode]
        s_y = std_y[v, mode]
        r = rho[v, mode]
        # cov = r * (s_x * s_y)

        normal = srng.normal((h.shape[0], 2))
        x = normal[:, 0]
        y = normal[:, 1]

        # x_n = T.shape_padright(s_x * x + cov * y + m_x)
        # y_n = T.shape_padright(s_y * y + cov * x + m_y)

        x_n = T.shape_padright(m_x + s_x * x)
        y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1.-r**2)))

        uniform = srng.uniform((h.shape[0],))
        pin = T.shape_padright(T.cast(bernoulli > uniform, floatX))

        return T.concatenate([x_n, y_n, pin], axis=1)
Exemplo n.º 9
0
def test_f16_nonzero(mode=None, op_to_check=rng_mrg.mrg_uniform):
    srng = MRG_RandomStreams(seed=utt.fetch_seed())
    m = srng.uniform(size=(1000, 1000), dtype='float16')
    assert m.dtype == 'float16', m.type
    f = theano.function([], m, mode=mode)
    assert any(isinstance(n.op, op_to_check) for n in f.maker.fgraph.apply_nodes)
    m_val = f()
    assert np.all((0 < m_val) & (m_val < 1))
Exemplo n.º 10
0
class UniformRandom(object):
    """Implements uniform random sampling in Tensorflow"""
    def __init__(self):
        self._rng = RandomStreams(seed=self.seed or 123456)

    def _sample(self, shape, dtype):
        return self._rng.uniform(
            size=shape, low=self.low, high=self.high, dtype=dtype)
Exemplo n.º 11
0
    def compile_iter_fns(self, *args, **kwargs):
        
        
        eta = theano.shared(lasagne.utils.floatX(initial_eta))
        self.eta=eta
        self.shared_lr=eta
        
        
        loss_critic = self.real_out.mean() - self.fake_out.mean()
        critic_updates = rmsprop(
                -1*loss_critic, self.critic_params, learning_rate=eta)
                
        loss_gen = -1*self.fake_out.mean()
        generator_updates = rmsprop(
                loss_gen, self.generator_params, learning_rate=eta)
                
                
        # Clip critic parameters in a limited range around zero (except biases)
        critic_clip_updates=[]
        for param in lasagne.layers.get_all_params(self.critic, trainable=True,
                                                   regularizable=True):
                                                   
            critic_clip_updates.append([param, T.clip(param, -clip, clip)])
            
            
        # Instantiate a symbolic noise generator to use for training
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        srng = RandomStreams(seed=np.random.randint(2147462579, size=6))
        noise = srng.uniform((batchsize, 100))

        # Compile functions performing a training step on a mini-batch (according
        # to the updates dictionary) and returning the corresponding score:
        print('Compiling...')
        
        import time
        
        start = time.time()
        
        self.generator_train_fn = theano.function([], loss_gen,
                                             givens={self.noise_var: noise},
                                             updates=generator_updates)
        self.critic_train_fn = theano.function([self.input_var],loss_critic,
                                          givens={self.noise_var: noise},
                                          updates=critic_updates)
        self.critic_clip_fn = theano.function([],updates=critic_clip_updates)

        # Compile another function generating some data
        self.gen_fn = theano.function([self.noise_var],
                                 lasagne.layers.get_output(self.generator,
                                                           deterministic=True))
                                                           
        self.val_fn = theano.function([self.input_var], 
                                        outputs=[loss_critic, loss_gen],
                                        givens={self.noise_var: noise})
                                                           
        if self.verbose: print ('Compile time: %.3f s' % (time.time()-start))
Exemplo n.º 12
0
def test_deterministic():
    seed = utt.fetch_seed()
    sample_size = (10, 20)

    R = MRG_RandomStreams(seed=seed)
    u = R.uniform(size=sample_size)
    f = theano.function([], u)

    fsample1 = f()
    fsample2 = f()
    assert not np.allclose(fsample1, fsample2)

    R2 = MRG_RandomStreams(seed=seed)
    u2 = R2.uniform(size=sample_size)
    g = theano.function([], u2)
    gsample1 = g()
    gsample2 = g()
    assert np.allclose(fsample1, gsample1)
    assert np.allclose(fsample2, gsample2)
Exemplo n.º 13
0
def test_uniform():
    # TODO: test param low, high
    # TODO: test size=None
    # TODO: test ndim!=size.ndim
    # TODO: test bad seed
    # TODO: test size=Var, with shape that change from call to call
    if (config.mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
            config.mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 100)
        steps = 50
    else:
        sample_size = (500, 50)
        steps = int(1e3)

    x = tensor.matrix()
    for size, const_size, var_input, input in [
            (sample_size, sample_size, [], []),
            (x.shape, sample_size, [x],
             [np.zeros(sample_size, dtype=config.floatX)]),
            ((x.shape[0], sample_size[1]), sample_size, [x],
             [np.zeros(sample_size, dtype=config.floatX)]),
            # test empty size (scalar)
            ((), (), [], []),
            ]:

        # TEST CPU IMPLEMENTATION
        # The python and C implementation are tested with DebugMode
        x = tensor.matrix()
        R = MRG_RandomStreams(234)
        # Note: we specify `nstreams` to avoid a warning.
        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
        # for such situations: it would be better to instead filter the
        # warning using the warning module.
        u = R.uniform(size=size,
                      nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, u)
        assert any([isinstance(node.op, theano.sandbox.rng_mrg.mrg_uniform)
                    for node in f.maker.fgraph.toposort()])
        f(*input)

        # Increase the number of steps if sizes implies only a few samples
        if np.prod(const_size) < 10:
            steps_ = steps * 100
        else:
            steps_ = steps
        basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input)

        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        uu = RR.uniform(size=size)
        ff = theano.function(var_input, uu)
        # It's not our problem if numpy generates 0 or 1
        basictest(ff, steps_, const_size, prefix='numpy',
                  allow_01=True, inputs=input)
Exemplo n.º 14
0
 def new_update_deltas(self, network):
     alpha_vw = network.get_variable("alpha")
     step_size = network.find_hyperparameter(["step_size"])
     # NOTE: each MRG_RandomStreams has the same seed, so
     # all nodes with the same shape end up with the same alphas
     srng = MRG_RandomStreams()
     steps = srng.uniform(size=alpha_vw.shape,
                          low=-step_size,
                          high=step_size)
     # TODO clip value of alpha (to prevent it becoming linear)
     return treeano.UpdateDeltas({alpha_vw.variable: steps})
Exemplo n.º 15
0
class Dropout(object):
	def __init__(self, shape = None, prob=0.5):
		self.retain_prob = 1.0 - prob
		self.shape = shape
                self.seed = RNG.randint(1e6)
                self.rng = RandomStreams(self.seed)

	def drop(self, cur_in):
                self.mask = T.switch(self.rng.uniform(self.shape, dtype=theano.config.floatX)<self.retain_prob, 1., 0.)
	        h = cur_in * self.mask
		h /= self.retain_prob
		return h
Exemplo n.º 16
0
def test_target_parameter():
    srng = MRG_RandomStreams()
    pvals = np.array([[.98, .01, .01], [.01, .49, .50]])

    def basic_target_parameter_test(x):
        f = theano.function([], x)
        assert isinstance(f(), np.ndarray)

    basic_target_parameter_test(srng.uniform((3, 2), target='cpu'))
    basic_target_parameter_test(srng.binomial((3, 2), target='cpu'))
    basic_target_parameter_test(srng.multinomial(pvals=pvals.astype('float32'), target='cpu'))
    basic_target_parameter_test(srng.choice(p=pvals.astype('float32'), replace=False, target='cpu'))
    basic_target_parameter_test(srng.multinomial_wo_replacement(pvals=pvals.astype('float32'), target='cpu'))
Exemplo n.º 17
0
def test_seed_fn():
    idx = tensor.ivector()

    for new_seed, same in [(234, True), (None, True), (23, False)]:
        random = MRG_RandomStreams(234)
        fn1 = theano.function([], random.uniform((2, 2), dtype='float32'))
        fn2 = theano.function([], random.uniform((3, 3), nstreams=2,
                                                 dtype='float32'))
        fn3 = theano.function([idx],
                              random.uniform(idx, nstreams=3, ndim=1,
                                             dtype='float32'))

        fn1_val0 = fn1()
        fn1_val1 = fn1()
        assert not np.allclose(fn1_val0, fn1_val1)
        fn2_val0 = fn2()
        fn2_val1 = fn2()
        assert not np.allclose(fn2_val0, fn2_val1)
        fn3_val0 = fn3([4])
        fn3_val1 = fn3([4])
        assert not np.allclose(fn3_val0, fn3_val1)
        assert fn1_val0.size == 4
        assert fn2_val0.size == 9

        random.seed(new_seed)

        fn1_val2 = fn1()
        fn1_val3 = fn1()
        fn2_val2 = fn2()
        fn2_val3 = fn2()
        fn3_val2 = fn3([4])
        fn3_val3 = fn3([4])
        assert np.allclose(fn1_val0, fn1_val2) == same
        assert np.allclose(fn1_val1, fn1_val3) == same
        assert np.allclose(fn2_val0, fn2_val2) == same
        assert np.allclose(fn2_val1, fn2_val3) == same
        assert np.allclose(fn3_val0, fn3_val2) == same
        assert np.allclose(fn3_val1, fn3_val3) == same
Exemplo n.º 18
0
    def fprop(self, state_below):
        
        print "======fprop====="
        
        rng = RandomStreams(seed=234)

        #size = theano.tensor.as_tensor_variable((state_below.shape[0], self.dim))
        un = rng.uniform(size=(state_below.shape[0], self.dim), low=0., high=1., dtype=config.floatX)
        self.noise = T.log(un/(1-un))
        p = self._linear_part(state_below) + self.noise * self.noise_factor

        batch_size = (p.shape[0]).astype(config.floatX)
        self.active_rate = T.gt(p, self.threshold).sum(axis=0, dtype=config.floatX) / batch_size
        
        return T.gt(p, self.threshold) * p
class IfElseDropLayer(Layer):
    def __init__(self, incoming, p=0.5, **kwargs):
        super(IfElseDropLayer, self).__init__(incoming, **kwargs)
        self._srng = RandomStreams(get_rng().randint(1, 2147462579))
        self.p = p

    def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            return self.p*input
        else:
            return ifelse(
                T.lt(self._srng.uniform( (1,), 0, 1)[0], self.p),
                input,
                T.zeros(input.shape)
            )
Exemplo n.º 20
0
class SaltAndPepperNoiseLayer(lasagne.layers.Layer):
    def __init__(self, incoming, rate=0.1, **kwargs):
        super(SaltAndPepperNoiseLayer, self).__init__(incoming, **kwargs)
        self._srng = RandomStreams(np.random.randint(1, 2147462579))
        self.rate = rate

    def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.rate == 0:
            return input
        else:
            drop = self._srng.uniform(input.shape)
            z = T.lt(drop, 0.5 * self.rate)
            o = T.lt(T.abs_(drop - 0.75 * self.rate), 0.25 * self.rate)
            input = T.set_subtensor(input[z.nonzero()], 0.)
            input = T.set_subtensor(input[o.nonzero()], 1.)
            return input
Exemplo n.º 21
0
    def compile_iter_fns(self, *args, **kwargs):
        
        # Create loss expressions to be minimized
        # a, b, c = -1, 1, 0  # Equation (8) in the paper
        a, b, c = 0, 1, 1  # Equation (9) in the paper
        loss_gen = lasagne.objectives.squared_error(self.fake_out, c).mean()
        # loss_gen = -1*self.fake_out.mean()
        loss_critic = (lasagne.objectives.squared_error(self.real_out, b).mean() +
                       lasagne.objectives.squared_error(self.fake_out, a).mean())
        # loss_critic = self.real_out.mean() - self.fake_out.mean()
        self.shared_lr = theano.shared(lasagne.utils.floatX(initial_eta))
        
        generator_updates = lasagne.updates.rmsprop(
                loss_gen, self.generator_params, learning_rate=self.shared_lr)
        critic_updates = lasagne.updates.rmsprop(
                loss_critic, self.critic_params, learning_rate=self.shared_lr)
            
        # Instantiate a symbolic noise generator to use for training
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        srng = RandomStreams(seed=np.random.randint(2147462579, size=6))
        noise = srng.uniform((batchsize, 100))

        # Compile functions performing a training step on a mini-batch (according
        # to the updates dictionary) and returning the corresponding score:
        if self.verbose: print('Compiling...')
        
        import time
        
        start = time.time()
        
        self.generator_train_fn = theano.function([], loss_gen,
                                             givens={self.noise_var: noise},
                                             updates=generator_updates)
        self.critic_train_fn = theano.function([self.input_var],loss_critic,
                                          givens={self.noise_var: noise},
                                          updates=critic_updates)
                                          
        # Compile another function generating some data
        self.gen_fn = theano.function([self.noise_var],
                                 lasagne.layers.get_output(self.generator,
                                                           deterministic=True))
                                                           
        self.val_fn = theano.function([self.input_var], 
                                        outputs=[loss_critic, loss_gen],
                                        givens={self.noise_var: noise})
                                                           
        if self.verbose: print ('Compile time: %.3f s' % (time.time()-start))
class IfElseDropLayer(lasagne.layers.Layer):
    def __init__(self, incoming, nonlinearity=elu, survival_p=0.5,
                 **kwargs):
        super(IfElseDropLayer, self).__init__(incoming, **kwargs)
        self.nonlinearity = (identity if nonlinearity is None
                             else nonlinearity)
        self._srng = RandomStreams(lasagne.random.get_rng().randint(1, 2147462579))
        self.p = 1-survival_p

    def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            return self.p*input
        else:
            return theano.ifelse.ifelse(
                T.lt(self._srng.uniform( (1,), 0, 1)[0], self.p),
                input,
                T.zeros(input.shape)
            ) 
Exemplo n.º 23
0
def test_GPUA_full_fill():
    # Make sure the whole sample buffer is filled.  Also make sure
    # large samples are consistent with CPU results.

    # This needs to be large to trigger the problem on GPU
    size = (10, 1000)

    R = MRG_RandomStreams(234)
    uni = R.uniform(size, nstreams=60 * 256)
    f_cpu = theano.function([], uni)

    rstate_gpu = gpuarray_shared_constructor(R.state_updates[-1][0].get_value())
    new_rstate, sample = GPUA_mrg_uniform.new(rstate_gpu, ndim=None,
                                              dtype='float32',
                                              size=size)
    rstate_gpu.default_update = new_rstate
    f_gpu = theano.function([], sample, mode=mode)

    utt.assert_allclose(f_cpu(), f_gpu())
Exemplo n.º 24
0
def test_cpu_target_with_shared_variable():
    srng = MRG_RandomStreams()
    s = np.random.rand(2, 3).astype('float32')
    x = gpuarray_shared_constructor(s, name='x')
    try:
        # To have theano.shared(x) try to move on the GPU
        theano.compile.shared_constructor(gpuarray_shared_constructor)
        y = srng.uniform(x.shape, target='cpu')
        y.name = 'y'
        z = (x * y).sum()
        z.name = 'z'

        fz = theano.function([], z, mode=mode)

        nodes = fz.maker.fgraph.toposort()
        assert not any([isinstance(node.op, GPUA_mrg_uniform) for node in nodes])
    finally:
        theano.compile.shared_constructor(gpuarray_shared_constructor,
                                          remove=True)
Exemplo n.º 25
0
    def make_samples(self):
        """Generate sample points uniformly distributed within the sphere.
        
        Returns float array of sample points.
        
        """
        srng = RandomStreams(seed=self.ensemble.seed)
        samples = srng.normal((self.num_samples, self.ensemble.dimensions))
        
        # normalize magnitude of sampled points to be of unit length
        norm = TT.sum(samples * samples, axis=[1], keepdims=True) 
        samples = samples / TT.sqrt(norm)

        # generate magnitudes for vectors from uniform distribution
        scale = (srng.uniform((self.num_samples,))
                 ** (1.0 / self.ensemble.dimensions))

        # scale sample points
        samples = samples.T * scale 
        
        return theano.function([], samples)()
Exemplo n.º 26
0
def test_consistency_randomstreams():
    # Verify that the random numbers generated by MRG_RandomStreams
    # are the same as the reference (Java) implementation by L'Ecuyer et al.
    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    samples = []
    rng = MRG_RandomStreams(seed=seed)
    for i in range(n_streams):
        stream_samples = []
        u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
        f = theano.function([], u)
        for j in range(n_samples):
            s = f()
            stream_samples.append(s)
        stream_samples = np.array(stream_samples)
        stream_samples = stream_samples.T.flatten()
        samples.append(stream_samples)

    samples = np.array(samples).flatten()
    assert(np.allclose(samples, java_samples))
Exemplo n.º 27
0
class RangeDropoutLayer(Layer):
    def __init__(self, low_p, high_p):
        super(RangeDropoutLayer, self).__init__()
        assert 0.0 < low_p < 1.0
        assert 0.0 < high_p < 1.0
        assert low_p < high_p
        self.low_p = low_p
        self.high_p = high_p

        self.srng = RandomStreams(seed=np.random.randint(10e6))

    def build(self, input, is_train):
        super(RangeDropoutLayer, self).build(input)
        self.p = self.srng.uniform((1,1), low=self.low_p, high=self.high_p)
        retain_prob = 1 - self.p[0][0]
        self.output = T.switch(T.gt(is_train, 0),
                self.input * self.srng.binomial(self.input.shape,
                                                p=retain_prob,
                                                dtype=theano.config.floatX) / retain_prob,
                self.input)

    def get_output(self):
        return self.output
Exemplo n.º 28
0
    def test_nanguardmode():
        # this is the case which requires a custom nanguardmode
        srng = MRG_RandomStreams()
        x = srng.uniform((3, 4, 5))

        def random_number(mode):
            return theano.function([], [x], mode=mode)()

        @nt.raises(AssertionError)
        def fails():
            random_number(theano.compile.nanguardmode.NanGuardMode(
                nan_is_error=True,
                inf_is_error=True,
                big_is_error=True
            ))

        fails()

        random_number(treeano.theano_extensions.nanguardmode.NanGuardMode(
            nan_is_error=True,
            inf_is_error=True,
            big_is_error=True
        ))
Exemplo n.º 29
0
class GanModel(object):
    def __init__(self, gan_model_name, *args, **kwargs):

        self.model_func = getattr(gan_models, gan_model_name)

        self.logging = kwargs.pop('logging')
        self.kwargs = kwargs.pop('opt_kwargs')
        self.batch_size = kwargs['batch_size']
        self.gan_mode = kwargs['gan_mode']

        d_bn_mode = kwargs['d_bn']
        g_bn_mode = kwargs['g_bn']

        d_dp_p = kwargs['d_dp']
        g_dp_p = kwargs['g_dp']

        if kwargs['d_bn'] in ('bn', 'ln'):

            def d_bn(x):
                return gan_models.batch_norm(x, steal_nl=0, axes=d_bn_mode)
        else:
            self.logging.info('no D normalization')

            def d_bn(x):
                return x

        if kwargs['g_bn'] in ('bn', 'ln'):

            def g_bn(x):
                return gan_models.batch_norm(x, steal_nl=0, axes=g_bn_mode)
        else:
            self.logging.info('no G normalization')

            def g_bn(x):
                return x

        if floatX(kwargs['d_dp']) != 0.:

            def d_dp(x):
                return ll.DropoutLayer(x, floatX(d_dp_p))
        else:
            d_dp = None

        if floatX(kwargs['g_dp']) != 0.:

            def g_dp(x):
                return ll.DropoutLayer(x, floatX(g_dp_p))
        else:
            g_dp = None

        kwargs['d_bn'] = d_bn
        kwargs['g_bn'] = g_bn
        kwargs['d_dp'] = d_dp
        kwargs['g_dp'] = g_dp

        self.l_out_g, self.l_out_d, self.l_data, self.g_layers, self.d_layers = \
            self.model_func(*args, **kwargs)

        rng_data = np.random.RandomState()
        rng = np.random.RandomState()
        self.theano_rng = MRG_RandomStreams(rng.randint(2**15))
        lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))

    def get_g_outputs(self):

        self.fake_dat = ll.get_output(self.l_out_g, deterministic=False)

        self.constant_z = T.constant(
            np.random.randn(
                *self.g_layers['l_z'].input_var.eval().shape).astype(floatX))
        self.fake_dat_cz = ll.get_output(self.l_out_g,
                                         self.constant_z,
                                         deterministic=False)

        return self.fake_dat, self.fake_dat_cz

    def get_d_outputs(self, real_dat=None, fake_dat=None):
        kwargs = self.kwargs

        def df_dx(last_ll, dat):

            gradients = T.grad(T.sum(last_ll, axis=0).squeeze(), dat)
            slopes = T.sqrt(T.sum(gradients**2, axis=(1, 2, 3)))

            return gradients, slopes

        if real_dat is None:
            real_dat = self.real_dat = T.tensor4()

        if fake_dat is None:
            fake_dat = self.fake_dat

        self.alpha = self.theano_rng.uniform((self.batch_size, 1, 1, 1),
                                             low=0.0,
                                             high=1.0)

        differences = real_dat - fake_dat
        self.interp_dat = interp_dat = real_dat + (self.alpha * differences)

        interp_evaled_layers = fake_evaled_layers = fake_cz_evaled_layers = real_evaled_layers = [
            self.l_out_d
        ]

        preact_layers = [
            self.d_layers[d] for d in self.d_layers.keys()
            if d.startswith('preact')
        ]

        if 'last_linear' in self.d_layers:
            interp_evaled_layers.append(self.d_layers['last_linear'])
            fake_evaled_layers.append(self.d_layers['last_linear'])
            real_evaled_layers.append(self.d_layers['last_linear'])
            fake_cz_evaled_layers.append(self.d_layers['last_linear'])

        interp_evaled_layers.extend(preact_layers)
        fake_evaled_layers.extend(preact_layers)
        fake_cz_evaled_layers.extend(preact_layers)
        real_evaled_layers.extend(preact_layers)

        interp_evaled = ll.get_output(interp_evaled_layers,
                                      self.interp_dat,
                                      deterministic=False)
        fake_evaled = ll.get_output(fake_evaled_layers,
                                    self.fake_dat,
                                    deterministic=False)
        fake_cz_evaled = ll.get_output(fake_cz_evaled_layers,
                                       self.fake_dat_cz,
                                       deterministic=False)
        real_evaled = ll.get_output(real_evaled_layers,
                                    self.real_dat,
                                    deterministic=False)

        real_l2v = OrderedDict(safe_zip(real_evaled_layers, real_evaled))
        fake_l2v = OrderedDict(safe_zip(fake_evaled_layers, fake_evaled))
        fake_cz_l2v = OrderedDict(
            safe_zip(fake_cz_evaled_layers, fake_cz_evaled))
        interp_l2v = OrderedDict(safe_zip(interp_evaled_layers, interp_evaled))

        output_gen = fake_l2v[self.l_out_d]
        output_data = real_l2v[self.l_out_d]

        gradients_fake, slopes_fake = df_dx(
            fake_l2v[self.d_layers['last_linear']], self.fake_dat)
        gradients_real, slopes_real = df_dx(
            real_l2v[self.d_layers['last_linear']], self.real_dat)
        gradients_interp, slopes_interp = df_dx(
            interp_l2v[self.d_layers['last_linear']], self.interp_dat)

        gradient_penalty_real = grad_penalty(slopes_real,
                                             floatX(kwargs['gp_slope']))
        gradient_penalty_fake = grad_penalty(slopes_fake,
                                             floatX(kwargs['gp_slope']))
        gradient_penalty_interp = grad_penalty(slopes_interp,
                                               floatX(kwargs['gp_slope']))

        # loss terms
        self.d_cost_adv, self.g_cost_adv = build_costs(output_gen, output_data,
                                                       None, None,
                                                       self.gan_mode)
        d_cost = self.d_cost_adv
        g_cost = self.g_cost_adv

        if floatX(kwargs['smoothness']):
            smoothness_cost = 0.
            for pl in preact_layers:
                mm = self.theano_rng.binomial(
                    (self.batch_size, ) + pl.input_shape[1:],
                    p=.5,
                    dtype='float32')

                zz = self.theano_rng.uniform((1, ) + pl.input_shape[1:],
                                             dtype='float32')

                zz = zz * mm
                pl_zh = pl.get_output_for(zz)

                print zz.eval().shape, pl_zh.eval().shape
                smoothness_cost += w_smoothness(pl_zh, self.batch_size)
                # import ipdb
                # ipdb.set_trace()

            d_cost += floatX(kwargs['smoothness']) * smoothness_cost

        if floatX(kwargs['gp_weight']):
            self.logging.info('gp with weight:' + kwargs['gp_weight'])

            d_cost += floatX(kwargs['gp_weight']) * gradient_penalty_interp

        h_layers_fake = [fake_l2v[l] for l in preact_layers]
        h_layers_interp = [interp_l2v[l] for l in preact_layers]
        h_layers_real = [real_l2v[l] for l in preact_layers]

        bre_real, me_real, ac_real, _, ac_stats_real = bre(
            h_layers_real, binarizer=kwargs['binarizer'])

        bre_fake, me_fake, ac_fake, _, ac_stats_fake = bre(
            h_layers_fake, binarizer=kwargs['binarizer'])

        bre_interp, me_interp, ac_interp, _, ac_stats_interp = bre(
            h_layers_interp, binarizer=kwargs['binarizer'])

        self.bre_w = bre_w = theano.shared(floatX(kwargs['bre_w']))

        if floatX(kwargs['bre_w']):

            bre_loss = 0.
            if floatX(kwargs['bre_on_real']):
                self.logging.info('BRE regularization on real')
                bre_loss += bre_w * bre_real

            if floatX(kwargs['bre_on_fake']):
                self.logging.info('BRE regularization on fake')
                bre_loss += bre_w * bre_fake

            if floatX(kwargs['bre_on_interp']):
                self.logging.info('BRE regularization on interp')
                bre_loss += bre_w * bre_interp

            d_cost += bre_loss

        if floatX(kwargs['monitor']):

            ac_min_fake, ac_mean_fake, ac_max_fake, \
                ac_abs_mean_fake, ac_sat_ratio9_fake, ac_sat_ratio9_fake = ac_stats_fake

            ac_min_interp, ac_mean_interp, ac_max_interp, \
                ac_abs_mean_interp, ac_sat_ratio9_interp, ac_sat_ratio9_interp = ac_stats_interp

            ac_min_real, ac_mean_real, ac_max_real, \
                ac_abs_mean_real, ac_sat_ratio9_real, ac_sat_ratio9_real = ac_stats_real

        all_g_layers = ll.get_all_layers(self.g_layers.values())
        all_d_layers = ll.get_all_layers(self.d_layers.values())

        gen_wdecay = regularize_layer_params(all_g_layers,
                                             lasagne.regularization.l2)
        disc_wdecay = regularize_layer_params(all_d_layers,
                                              lasagne.regularization.l2)

        d_cost += floatX(kwargs['d_wdecay']) * disc_wdecay
        g_cost += floatX(kwargs['g_wdecay']) * gen_wdecay

        self.d_cost = d_cost
        self.g_cost = g_cost

        err_data = T.cast(output_data < .5, 'float32').mean()
        err_gen = T.cast(output_gen > .5, 'float32').mean()

        # monitor

        if floatX(kwargs['monitor']):
            monitor_stats = []
            monitor_stats += [err_data, err_gen]

            monitor_stats += [me_fake, me_interp, me_real]
            # absh_mu_fake, absh_mu_interp, absh_mu_real]

            monitor_stats += [
                gradients_fake, slopes_fake, gradient_penalty_fake
            ]
            monitor_stats += [
                gradients_real, slopes_real, gradient_penalty_real
            ]
            monitor_stats += [
                gradients_interp, slopes_interp, gradient_penalty_interp
            ]

            monitor_stats += [ac_fake, ac_real, ac_interp]

            if floatX(kwargs['smoothness']):
                monitor_stats += [smoothness_cost]

            if ac_stats_real:
                monitor_stats += [
                    ac_min_real, ac_mean_real, ac_max_real, ac_abs_mean_real,
                    ac_sat_ratio9_real, ac_sat_ratio9_real
                ]
            if ac_stats_fake:
                monitor_stats += [
                    ac_min_fake, ac_mean_fake, ac_max_fake, ac_abs_mean_fake,
                    ac_sat_ratio9_fake, ac_sat_ratio9_fake
                ]
            if ac_stats_interp:
                monitor_stats += [
                    ac_min_interp, ac_mean_interp, ac_max_interp,
                    ac_abs_mean_interp, ac_sat_ratio9_interp,
                    ac_sat_ratio9_interp
                ]

            _vars = locals()

            def get_name(v):
                for k in _vars:
                    if _vars[k] is v and k != 'v' and k != 'k':
                        return k

            self.monitor_stats = OrderedDict([(get_name(v), v)
                                              for v in monitor_stats])

    def build_funcs(self):

        self.get_g_outputs()
        self.get_d_outputs()

        kwargs = self.kwargs

        d_trainable_params = ll.get_all_params(ll.get_all_layers(
            self.d_layers.values()),
                                               trainable=True)
        g_trainable_params = ll.get_all_params(ll.get_all_layers(
            self.g_layers.values()),
                                               trainable=True)

        self.d_trainable_params = d_trainable_params
        self.g_trainable_params = g_trainable_params

        all_d_params = ll.get_all_params(
            ll.get_all_layers(self.d_layers.values()))
        all_g_params = ll.get_all_params(
            ll.get_all_layers(self.g_layers.values()))

        self.all_d_params = all_d_params
        self.all_g_params = all_g_params

        self.g_sh_lr = theano.shared(
            lasagne.utils.floatX(floatX(kwargs['g_lr'])))
        self.d_sh_lr = theano.shared(
            lasagne.utils.floatX(floatX(kwargs['d_lr'])))

        d_beta1 = floatX(kwargs['d_beta1'])
        g_beta1 = floatX(kwargs['g_beta1'])
        beta2 = floatX(kwargs['beta2'])

        d_updater = lu.adam(self.d_cost,
                            d_trainable_params,
                            self.d_sh_lr,
                            beta1=d_beta1,
                            beta2=beta2)
        g_updater = lu.adam(self.g_cost,
                            g_trainable_params,
                            self.g_sh_lr,
                            beta1=g_beta1,
                            beta2=beta2)

        self.out_var2name = out_var2name = OrderedDict([])

        out_var2name[self.g_cost] = 'g_cost_tot'
        out_var2name[self.g_cost_adv] = 'g_cost_adv'

        out_var2name[self.d_cost] = 'd_cost_tot'
        out_var2name[self.d_cost_adv] = 'd_cost_adv'

        # 0: no, 1: every eval interval, 2: every step
        if int(self.kwargs.get('monitor', '0')) >= 2:
            for k in self.monitor_stats:
                out_var2name[self.monitor_stats[k]] = k
            self.g_inputs = g_inputs = [self.real_dat]
        else:
            self.g_inputs = g_inputs = []

        all_g_updates = g_updater
        all_d_updates = d_updater

        self.g_outs = [self.g_cost, self.g_cost_adv]
        self.g_train_func = theano.function(inputs=g_inputs,
                                            outputs=self.g_outs,
                                            updates=all_g_updates)

        self.d_outs = [self.d_cost, self.d_cost_adv]
        self.d_train_func = theano.function(inputs=[self.real_dat],
                                            outputs=self.d_outs,
                                            updates=all_d_updates)

        self.g_sample_func = theano.function(inputs=[],
                                             outputs=(self.fake_dat + 1.) / 2.)

        self.g_sample_cz_func = theano.function(
            inputs=[], outputs=(self.fake_dat_cz + 1.) / 2.)

        if int(kwargs.get('monitor', '0')) >= 1:
            self.monitor_func = theano.function(
                inputs=[self.real_dat], outputs=self.monitor_stats.values())

    def g_sample(self, N_samples):
        g_samples = []

        while sum(x.shape[0] for x in g_samples) <= N_samples:
            success = False
            while not success:
                try:
                    g_data_v = self.g_sample_func()
                    success = True
                except Exception as err:
                    self.logging.info(err)

            g_samples.append(g_data_v)

        g_samples = np.vstack(g_samples)[:N_samples]
        return g_samples
Exemplo n.º 30
0
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 n_out,
                 num_MC,
                 num_FF,
                 n_tot,
                 free_param,
                 Domain_number=None,
                 number="1",
                 Domain_consideration=True):
        #inputも100*N*Dで入ってくるようにする.
        self.DATA = input
        #N=DATA.shape[1]
        #n_in_D=DATA.shape[2]
        srng = RandomStreams(seed=234)
        self.num_rff = num_FF

        #Define hyperparameters
        lhyp_values = np.zeros(n_in + 1, dtype=theano.config.floatX) + np.log(
            0.1, dtype=theano.config.floatX)
        #lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(1.,dtype=theano.config.floatX)
        self.lhyp = theano.shared(value=lhyp_values,
                                  name='lhyp' + number,
                                  borrow=True)
        self.sf2, self.l = T.exp(self.lhyp[0]), T.exp(self.lhyp[1:1 + n_in])

        if Domain_consideration:  #先行研究は0.1でうまくいった
            ls_value = np.zeros(Domain_number,
                                dtype=theano.config.floatX) + np.log(
                                    0.1, dtype=theano.config.floatX)
        else:
            ls_value = np.zeros(1, dtype=theano.config.floatX) + np.log(
                0.1, dtype=theano.config.floatX)

        self.ls = theano.shared(value=ls_value,
                                name='ls' + number,
                                borrow=True)

        #Define prior omega
        #prior_mean_Omega.append(tf.zeros([self.d_in[i],1]))
        self.log_prior_var_Omega = T.tile(1 / (self.l)**0.5, (num_FF, 1)).T

        #Define posterior omega

        #get samples from  omega

        sample_value = np.random.randn(1, n_in, num_FF)

        self.sample_Omega_epsilon_0 = theano.shared(value=sample_value,
                                                    name='sample_Omega' +
                                                    number)
        #self.sample_Omega_epsilon_0 = srng.normal((1,n_in,num_FF))
        Omega_sample = self.sample_Omega_epsilon_0 * self.log_prior_var_Omega[
            None, :, :]
        Omega_samples = T.tile(Omega_sample, (num_MC, 1, 1))

        self.samples = Omega_samples
        #Define prior W
        #prior_mean_W = T.zeros(2*num_FF)

        #log_prior_var_W = T.ones(2*num_FF)

        #Define posterior W

        mean_mu_value = np.random.randn(2 * num_FF, n_out)  #* 1e-2
        self.mean_mu = theano.shared(value=mean_mu_value,
                                     name='mean_mu' + number,
                                     borrow=True)

        log_var_value = np.zeros((2 * num_FF, n_out))
        self.log_var_W = theano.shared(value=log_var_value,
                                       name='q_W' + number,
                                       borrow=True)

        #get samples from W
        sample_Omega_epsilon = srng.normal((num_MC, 2 * num_FF, n_out))
        f2 = T.cast(free_param, 'int64')
        N = srng.uniform(size=(f2 + n_tot, num_MC), low=1e-10, high=1.0)
        gamma_factor = T.sum(T.log(N), 0) * (-1)

        #gamma_factor=self.gamma_dist(free_param+n_tot,1,num_MC)

        sample_Omega_epsilon_gamma = (
            (free_param + n_tot) / gamma_factor)[:, None,
                                                 None] * sample_Omega_epsilon
        #MC*Nrff*dout
        W_samples = sample_Omega_epsilon_gamma * (T.exp(
            self.log_var_W)**0.5)[None, :, :] + self.mean_mu[None, :, :]

        # calculate lyaer N_MC*N*D_out
        F_next, updates = theano.scan(
            fn=lambda a, b, c: self.passage(a, b, c, num_FF),
            sequences=[input, Omega_samples, W_samples])

        #output
        self.output = F_next

        #KL-divergence
        #Omega

        #W
        #cross-entropy-term
        #self.KL_W=self.DKL_gaussian(self.mean_mu, self.log_var_W, prior_mean_W, log_prior_var_W)
        CH_const = T.gammaln((n_out + free_param) / 2) - T.log((
            (free_param - 2) * np.pi)**(n_out / 2)) - T.gammaln(free_param / 2)

        ch_mc, updates = theano.scan(fn=lambda a: (T.log(1 + T.sum(a * a, -1) /
                                                         (free_param - 2))),
                                     sequences=[W_samples])

        CH_MC = T.mean(T.sum(ch_mc, -1))

        CH = CH_const * num_FF - CH_MC * (free_param + n_out) / 2

        #entropy-term
        HF = self.shanon_Entropy_studentt(self.log_var_W, free_param + n_tot)

        self.KL_W = -HF - CH

        #parameter_setting
        self.all_params = [self.lhyp, self.ls, self.mean_mu, self.log_var_W]
        self.hyp_params = [self.lhyp, self.ls]
        self.variational_params = [self.mean_mu, self.log_var_W]
Exemplo n.º 31
0
class RBM(object):
    # Implement a Bernoulli Restricted Boltzmann Machine

    def __init__(self, input, n_visible, n_hidden):
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.input = input

        # Rescale terms for visible units
        self.a = theano.shared(value=np.zeros(n_visible,
                                              dtype=theano.config.floatX),
                               borrow=True,
                               name='a')
        # Bias terms for hidden units
        self.b = theano.shared(np.zeros(n_hidden, dtype=theano.config.floatX),
                               borrow=True,
                               name='b')

        # Weights
        rng = np.random.RandomState(2468)
        #        Warray = np.asarray(rng.uniform(-0.1, 0.1, (n_visible, n_hidden)),
        #                                            dtype=theano.config.floatX)
        Warray = np.asarray(rng.uniform(
            -4 * np.sqrt(6. / (n_hidden + n_visible)),
            4 * np.sqrt(6. / (n_hidden + n_visible)), (n_visible, n_hidden)),
                            dtype=theano.config.floatX)
        self.W = theano.shared(Warray, borrow=True, name='W')
        #        self.Wt = self.W.T
        self.Wt = theano.shared(np.transpose(Warray), borrow=True, name='Wt')

        self.srng = RandomStreams(rng.randint(2**30))

    def v_sample(self, h):
        # Derive a sample of visible units from the hidden units h
        act = self.a + tensor.dot(h, self.Wt)
        prob = tensor.nnet.sigmoid(act)
        return [
            prob,
            self.srng.binomial(size=act.shape,
                               n=1,
                               p=prob,
                               dtype=theano.config.floatX)
        ]

    def h_sample(self, v):
        # Derive a sample of hidden units from the visible units v
        act = self.b + tensor.dot(v, self.W)
        prob = tensor.nnet.sigmoid(act)
        return [
            prob,
            self.srng.binomial(size=act.shape,
                               n=1,
                               p=prob,
                               dtype=theano.config.floatX)
        ]

    def output(self):
        prob, hS = self.h_sample(self.input)
        return prob

    def gibbs_step_hvh(self, h):
        # A Gibbs step
        nv_prob, nv_sample = self.v_sample(h)
        nh_prob, nh_sample = self.h_sample(nv_sample)
        return [nv_prob, nv_sample, nh_prob, nh_sample]

    def gibbs_step_hvhp(self, hp):
        # A Gibbs step
        nv_prob, nv_sample = self.v_sample(hp)
        nh_prob, nh_sample = self.h_sample(nv_prob)
        return [nv_prob, nv_sample, nh_prob, nh_sample]

    def gibbs_step_vhv(self, v):
        # A Gibbs step
        nh_prob, nh_sample = self.h_sample(v)
        nv_prob, nv_sample = self.v_sample(nh_sample)
        return [nv_prob, nv_sample, nh_prob, nh_sample]

    def free_energy(self, v_sample):
        ''' Function to compute the free energy '''
        wx_b = tensor.dot(v_sample, self.W) + self.b
        vbias_term = tensor.dot(v_sample, self.a)
        hidden_term = tensor.sum(tensor.log(1 + tensor.exp(wx_b)), axis=1)
        return -hidden_term - vbias_term

    def get_cost_updates(self,
                         k=1,
                         lr=0.01,
                         lam1=0.0,
                         lam2=0.0,
                         batch_size=None,
                         persistent=None,
                         stocastic_steps=True):
        # Contrastive divergence
        # Positive phase
        h0_prob, h0_sample = self.h_sample(self.input)

        if persistent is None:
            h_sample = h0_sample
        else:
            h_sample = persistent


#        self.Wt = self.W.T

# Negative phase
        if stocastic_steps:
            ([nv_probs, nv_samples, nh_probs, nh_samples],
             updates) = theano.scan(self.gibbs_step_hvh,
                                    outputs_info=[None, None, None, h_sample],
                                    n_steps=k,
                                    name="gibbs_update")
        else:
            ([nv_probs, nv_samples, nh_probs, nh_samples],
             updates) = theano.scan(self.gibbs_step_hvhp,
                                    outputs_info=[None, None, h0_prob, None],
                                    n_steps=k,
                                    name="gibbs_update")

        vK_prob = nv_probs[-1]
        vK_sample = nv_samples[-1]
        hK_prob = nh_probs[-1]
        hK_sample = nh_samples[-1]

        if persistent:
            updates[persistent] = hK_sample

        # See https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
        # I keep sigma unit as reported in https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf 13.2

        eps = tensor.cast(lr, dtype=theano.config.floatX)

        params = [self.a, self.b]

        if batch_size is None:
            cost = tensor.mean(self.free_energy(self.input)) -\
                   tensor.mean(self.free_energy(vK_sample))
            # We must not compute the gradient through the gibbs sampling
            params += [self.W]
            gparams = tensor.grad(cost, params, consider_constant=[vK_sample])
            for param, gparam in zip(params, gparams):
                updates[param] = param - eps * gparam
            updates[self.Wt] = updates[self.W].T
        else:
            eps0 = eps / tensor.cast(batch_size, dtype=theano.config.floatX)

            W_grad = eps0 * (tensor.dot(self.input.T, h0_prob) -
                             tensor.dot(vK_prob.T, hK_prob))

            W_gradT = W_grad.T

            a_grad = eps * tensor.mean(self.input - vK_prob, axis=0)

            b_grad = eps * tensor.mean(h0_prob - hK_prob, axis=0)

            gparams = [a_grad, b_grad]

            for param, gparam in zip(params, gparams):
                updates[param] = param + gparam

            if (lam1 + lam2) == 0:
                updates[self.W] = self.W + W_grad
                updates[self.Wt] = self.Wt + W_gradT
            else:
                # Used in M. Liang et al. 2015
                l1 = tensor.cast(2 * lam1 * lr, dtype=theano.config.floatX)
                l2 = tensor.cast(1 - 2 * lam2 * lr, dtype=theano.config.floatX)

                updates[self.W] = (l2 * self.W + W_grad ) /\
                              (1 + l1/tensor.abs_(self.W))
                updates[self.Wt] = (l2 * self.Wt + W_gradT ) /\
                              (1 + l1/tensor.abs_(self.Wt))

        if stocastic_steps:
            sme = tensor.mean(tensor.sum((self.input - vK_sample)**2, axis=1))
        else:
            sme = tensor.mean(tensor.sum((self.input - vK_prob)**2, axis=1))

        return sme, updates

    def training(self,
                 dataset,
                 batch_size,
                 training_epochs,
                 k,
                 lr,
                 lam1=0,
                 lam2=0,
                 CD=True,
                 persistent=None,
                 stocastic_steps=True,
                 data_shuffle=False,
                 display_fn=None):
        index = tensor.lscalar('index')
        train_set = theano.shared(dataset, borrow=True)

        sme, updates = self.get_cost_updates(k=k,
                                             lr=lr,
                                             persistent=persistent,
                                             lam1=lam1,
                                             lam2=lam2,
                                             stocastic_steps=stocastic_steps)

        n_data = dataset.shape[0]

        if not data_shuffle:
            train = theano.function(
                [index],
                sme,
                updates=updates,
                givens={
                    self.input:
                    train_set[index * batch_size:(index + 1) * batch_size]
                },
                name="train")
        else:
            indexes = tensor.floor(
                self.srng.uniform((1, batch_size), low=0, high=n_data))
            train = theano.function([],
                                    sme,
                                    updates=updates,
                                    givens={
                                        self.input:
                                        train_set[tensor.cast(
                                            indexes, dtype='int64')[0]]
                                    },
                                    name="train")

        for epoch in xrange(training_epochs):
            sme_list = []
            if not data_shuffle:
                for n_batch in xrange(n_data // batch_size):
                    sme_list.append(train(n_batch))
            else:
                sme_list.append(train())

            print("Training epoch %d, reconstruction error %f" %
                  (epoch, np.mean(sme_list)))

            if display_fn is not None:
                # Construct image from the weight matrix
                Wimg = display_fn(self.W.get_value(borrow=True), self.n_hidden)
                scipy.misc.imsave('filters_at_epoch_%i.png' % epoch, Wimg)

        # Construct image from the weight matrix
        Wimg = display_fn(self.W.get_value(borrow=True), self.n_hidden)
        scipy.misc.imsave('filters_at_epoch_%i.png' % epoch, Wimg)
Exemplo n.º 32
0
def makemodel(
    name="ADGM" if ADGM else "SDGM",
    nls=["rectify"] * 2,
    seed=seed,
    descenter=G.Adam,
    K=K,
    L=L,
):
    #$ tensor_shapes
    """
  Creates the ADGM or SDGM model.

  Xl has dimension (Nl, 1, 1, 1, X)
  Xu has dimension (Nu, 1, 1, 1, X)
  Yl has dimension (Nl, 1, 1, 1, Y)
  Yu has dimension ( 1, 1, 1, Y, Y)
  EAl has dimension (Nl, K, 1, 1, A)
  EAu has dimension (Nu, K, 1, 1, A)
  EZl has dimension (Nl, K, L, 1, Z)
  EZu has dimension (Nu, K, L, Y, Z)
  Al will have dimension (Nl, K, 1, 1, A)
  Au will have dimension (Nu, K, 1, 1, A)
  Zl will have dimension (Nl, K, L, 1, Z)
  Zu will have dimension (Nu, K, L, Y, Z)
  """
    #$

    Print = Log("../log/{}".format(name), "w", quiet=True)
    model = Model(name=name,
                  shuffledata=shuffledata,
                  thresholddata=thresholdX,
                  normalizedata=normalizeX,
                  seed=seed,
                  maxvar=highvaronly)
    model.Print = Print
    model.loadmomentum = loadmomentum
    model.descenter = descenter(gradnorm)
    networks = OrderedDict()
    rng = MRG_RandomStreams()

    X = model.XCols

    model.constants = OrderedDict([
        ("                    ", model.name),
        ("shuffle data?", shuffledata),
        ("data seed", model.seed),
        ("Nu", Nu),
        ("Nl", Nl),
        ("X", X),
        ("Y", Y),
        ("Z", Z),
        ("A", A),
        ("L", L),
        ("K", K),
        ("Kt", Kt),
        ("aJL", aJL),
        ("aJU", aJU),
        ("aJA", aJA),
        ("aJW", aJW),
        ("gradient norm?", gradnorm),
        ("std. normal A?", Anormal),
        ("A to Z?", AtoZ),
        ("gaussian X?", gaussianX),
        ("sample X?", sampleX),
        ("threshold X?", thresholdX),
        ("normalize X?", normalizeX),
        ("high var only?", highvaronly),
        ("NSaves", NSaves),
        ("enable save?", enablesave),
        ("combolength", combolength),
        ("load momentum?", loadmomentum),
        ("juggle momentum?", jugglemomentum),
        ("random juggler?", randomjuggler),
        ("epsilon", epsilon),
    ])
    for name, val in model.constants.items():
        model.Print("{:>20s}".format(name), val)

    #$ px_stack
    # Create the networks for px
    ins = [Y, Z] if ADGM else [A, Y, Z]
    last = "linear" if gaussianX else "sigmoid"
    O = [X, X] if gaussianX else [X]
    fx = Stack(insizes=ins, outsizes=O, hidnls=nls, lastnl=last)
    networks["fx"] = fx
    #$

    #$ pa_stack
    # Create the networks for pa
    ins = [X, Y, Z] if ADGM else [Y, Z]
    fa = Stack(insizes=ins, outsizes=[A, A], hidnls=nls)
    if not Anormal:
        networks["fa"] = fa
    #$

    #$ qz_stack
    # Create the networks for qz
    ins = [A, X, Y] if AtoZ else [X, Y]
    fz = Stack(insizes=ins, outsizes=[Z, Z], hidnls=nls)
    networks["fz"] = fz
    #$

    #$ qax_stack
    # Create the networks for qax
    ins = [X]
    fax = Stack(insizes=ins, outsizes=[A, A], hidnls=nls)
    networks["fax"] = fax
    #$

    #$ qy_stack
    # Create the network for qy.  Outputs are
    # probabilities, so last layer is always
    # softmax.
    ins = [A, X]
    last = "softmax"
    fy = Stack(insizes=ins, outsizes=[Y], hidnls=nls, lastnl=last)
    networks["fy"] = fy
    #$

    #$ model.networks
    # Collect all of the parameters together
    # so we can optimize the objectives with
    # respect to them.
    model.networks = networks
    model.params = []
    for name, net in model.networks.items():
        model.Print("{:>20s}".format(name), net)
        model.params += net.params
    #$

    # For now, throw an error if Nl or Nu are
    # not specified.
    # Eventually, we would like to be able to
    # handle only Nl, only Nu, or both Nl and Nu.
    if Nl is None or Nu is None:
        raise ValueError("Need to specify Nl and Nu")

    #$ shared_inputs
    # Xl, Ylh, and Xu are shared variables on the
    # GPU.  For Xu, we take random batch slices.
    # We assume for now that all (Xl,Yl) are used
    # in each batch.
    Xl2 = model.Xl[:Nl]
    Yl2 = model.Ylh[:Nl]

    bidxs = rng.uniform((Nu, )) * model.Xu.shape[0]
    bidxs = T.cast(bidxs, "int32")
    Xu2 = model.Xu[bidxs]
    #$

    #$ sampleX
    # If X is binary, then sample it on each
    # minibatch.  This idea borrowed from Maaloe's
    # code.  Not sure if it helps.
    #
    # Keep track of Xl2s, Yl2, and Xu2s so we can
    # do theano variable substitution later.
    if not gaussianX and sampleX:
        Xl2s = rng.binomial(n=1,
                            p=Xl2,
                            size=Xl2.shape,
                            dtype=theano.config.floatX)
        Xu2s = rng.binomial(n=1,
                            p=Xu2,
                            size=Xu2.shape,
                            dtype=theano.config.floatX)
    else:
        Xl2s = Xl2
        Xu2s = Xu2
    #$

    #$ dimshuffled
    # Reshape the labeled set matrices
    # to 5th-order tensors.
    Xl = Xl2s.dimshuffle([0, "x", "x", "x", 1])
    Yl = Yl2.dimshuffle([0, "x", "x", "x", 1])

    # Xu is known, but Yu is not known.
    # Create one possible Y per class.
    Xu = Xu2s.dimshuffle([0, "x", "x", "x", 1])
    Yu = T.eye(Y, Y).dimshuffle(["x", "x", "x", 0, 1])
    #$

    #$ noises
    # EZ and EA will be used to approximate
    # the integrals using L samples for Z and
    # K samples for A.
    #
    # Create shared variables for K and L so we
    # can do variable substitutions later.
    K = theano.shared(K, name="samplesA")
    L = theano.shared(L, name="samplesZ")
    EAl = rng.normal((Xl.shape[0], K, 1, 1, A))
    EAu = rng.normal((Xu.shape[0], K, 1, 1, A))
    EZl = rng.normal((Xl.shape[0], K, L, 1, Z))
    EZu = rng.normal((Xu.shape[0], K, L, Y, Z))
    #$

    # Assign inputs to the model.
    # We assume that all data is already on the GPU.
    # Furthermore, we create functions that
    # evaluate the objectives on the test data
    # directly.  Therefore, there are no inputs
    # needed for calling the training function.
    model.inputs = []

    #$ al_au
    # Find the latent variables.
    # Note that multiplying by E effectively tiles
    # all latent variables L or K times.
    #
    # Auxiliary A has to be found first
    # because latent Z is a function of it.
    muaxl, sdaxl = fax([Xl])
    muaxu, sdaxu = fax([Xu])
    Al = muaxl + T.exp(sdaxl) * EAl
    Au = muaxu + T.exp(sdaxu) * EAu
    #$

    #$ zl_zu
    # Compute Z.
    inputl = [Al, Xl, Yl] if AtoZ else [Xl, Yl]
    inputu = [Au, Xu, Yu] if AtoZ else [Xu, Yu]
    muzl, sdzl = fz(inputl)
    muzu, sdzu = fz(inputu)
    Zl = muzl + T.exp(sdzl) * EZl
    Zu = muzu + T.exp(sdzu) * EZu
    #$

    #$ muxl_muxu
    # Find the reconstruction means and
    # standard deviations.
    # Note: sdxl and sdxu are used only if
    #       gaussian is True.  The binary case
    #       ignores those.
    # If ADGM, then X is a function of YZ.
    # If SDGM, then X is a function of AYZ.
    inputl = [Yl, Zl] if ADGM else [Al, Yl, Zl]
    inputu = [Yu, Zu] if ADGM else [Au, Yu, Zu]
    if gaussianX:
        muxl, sdxl = fx(inputl)
        muxu, sdxu = fx(inputu)
    else:
        muxl = fx(inputl)
        muxu = fx(inputu)
    #$

    #$ mual_muau
    # Find mu and sd for A in the generative
    # (reconstruction) direction.
    # If ADGM, then A depends on XYZ.
    # If SDGM, then A depends on YZ.
    inputl = [Xl, Yl, Zl] if ADGM else [Yl, Zl]
    inputu = [Xu, Yu, Zu] if ADGM else [Yu, Zu]
    mual, sdal = fa(inputl)
    muau, sdau = fa(inputu)
    #$

    #$ JL_1
    # Find the component probabilities and the
    # labeled objective, JL.
    l_pz = loggauss(Zl)
    l_qz = loggauss(Zl, muzl, sdzl)

    l_py = T.log(1.0 / Y)

    if gaussianX:
        l_px = loggauss(Xl, muxl, sdxl)
    else:
        l_px = logbernoulli(Xl, muxl)
    #$

    #$ JL_2
    # In Maaloe's first revision, A is disconnected
    # in the generative model, so we assume it
    # to be standard normal.
    #
    # In the more updated version, A is fed into
    # by X, Y, and Z.
    # In SDGM, A is generated by Z and Y.
    normal = zero if Anormal else one
    l_pa = loggauss(Al, normal * mual, normal * sdal)
    l_qa = loggauss(Al, muaxl, sdaxl)
    #$

    #$ JL_3
    JL = l_qz + l_qa
    JL = JL - l_px - l_py - l_pz - l_pa
    JL = batchaverage(exA(exZ(JL)))
    JL = aJL * JL
    #$

    #$ JU_1
    # Find the component probabilities and the
    # unlabeled objective, JU.

    # The output of fy(Au, Xu) is pi.
    # (Nu, K, 1, 1, Y)
    # We need to relocate the last axis.
    # (Nu, K, 1, Y, 1)
    inputu = [Au, Xu]
    pi = fy(inputu).dimshuffle([0, 1, "x", 4, "x"])
    #$

    #$ JU_2
    u_pz = loggauss(Zu)
    u_qz = loggauss(Zu, muzu, sdzu)

    u_py = T.log(1.0 / Y)
    u_qy = T.log(pi)

    u_pa = loggauss(Au, normal * muau, normal * sdau)
    u_qa = loggauss(Au, muaxu, sdaxu)

    if gaussianX:
        u_px = loggauss(Xu, muxu, sdxu)
    else:
        u_px = logbernoulli(Xu, muxu)
    #$

    #$ JU_3
    JU = u_qz + u_qa + u_qy
    JU = JU - u_px - u_py - u_pz - u_pa
    JU = batchaverage(exA(classsum(exZ(JU), pi)))
    JU = aJU * JU
    #$

    #$ JA
    # Make sure that the known labels are correctly
    # assigned.
    # Yl has dimension (Nl, 1, 1, 1, Y)
    # Al,Xl has dimension (Nl, K, 1, 1, A+X)
    # fy(Al,Xl) is (Nl, K, 1, 1, Y)
    #
    # Yl is one-hot.
    # Multiply by Yl and perform a sum over
    # Y to get the one probability out, then neg
    # log it, average it over K, and
    # average it over N.
    inputl = [Al, Xl]
    JA = batchaverage(exA(-T.log(T.sum(fy(inputl) * Yl, axis=-1))))
    JA = aJA * JA
    #$

    # Regularize the weight matrices of the
    # networks so they do not stray far from zero.
    # Copied from Maaloe's github code.
    JW = zero
    for p in model.params:
        if 'W' not in str(p):
            continue
        JW += T.mean(p**two)
    JW = aJW * JW

    JCombined = JL + JU + JA + JW

    # Stick the objectives into the model.
    model.objective = JCombined

    #$ prediction_comments
    # Create a function for predictions!
    # We need to evaluate a bunch of values for A,
    # so Xt is an N by X dimensional matrix and
    # Et is a K by A dimensional matrix.
    # Reshape Xt to (N, 1, X) and
    #         Et to (1, K, A).
    #
    # Then, At = fmuax(Xt) + Et*fsdax(Xt)
    # and has a dimension of (N, K, A).
    #
    # Class probabilities pi are fy(AXt)
    # and have shape (N, K, Y).  Take their
    # log, average over K, then argmax over Y
    # to find class predictions.
    #$

    #$ prediction_function
    Xt2 = T.matrix("Xt")
    Et2 = rng.normal((Kt, A))
    Xt = Xt2.dimshuffle([0, "x", 1])
    Et = Et2.dimshuffle(["x", 0, 1])
    muat, sdat = fax([Xt])
    At = muat + T.exp(sdat) * Et

    inputt = [At, Xt]
    prediction = T.argmax(T.mean(T.log(fy(inputt)), axis=1), axis=-1)

    predict = theano.function(inputs=[Xt2],
                              outputs=prediction,
                              allow_input_downcast=True)
    model.predict = predict
    #$

    #$ classification
    Yt = T.ivector("Yt")
    accuracyT = T.eq(Yt, prediction).mean(dtype=theano.config.floatX)

    model.accuracyT = theano.function(inputs=[],
                                      outputs=accuracyT,
                                      givens={
                                          Xt2: model.Xt,
                                          Yt: model.Yt
                                      },
                                      allow_input_downcast=True)
    #$

    model.accuracyL = theano.function(inputs=[],
                                      outputs=accuracyT,
                                      givens={
                                          Xt2: model.Xl,
                                          Yt: model.Yl
                                      },
                                      allow_input_downcast=True)

    # Create a stats function that outputs
    # extra information.
    model.adds = [
        JL,
        JU,
        JA,
        JW,
        T.mean(l_qa),
        T.mean(u_qa),
        T.mean(u_qy),
        T.mean(l_qz),
        T.mean(u_qz),
        -T.mean(l_px.max(axis=AxisY)),
        -T.mean(u_px.max(axis=AxisY)),
        -T.mean(l_pa),
        -T.mean(u_pa),
    ]
    model.headings = [
        "J",
        "JL",
        "JU",
        "JA",
        "JW",
        "l q(a)",
        "u q(a)",
        "u q(y)",
        "l q(z)",
        "u q(z)",
        "l -p(x)",
        "u -p(x)",
        "l -p(a)",
        "u -p(a)",
    ]
    model.outputs = [model.objective] + model.adds
    model.stats = theano.function(inputs=[],
                                  outputs=model.outputs,
                                  givens={
                                      Xl2s: model.Xl[:1000],
                                      Yl2: model.Ylh[:1000],
                                      Xu2s: model.Xu[:1000],
                                      K: 1
                                  },
                                  allow_input_downcast=True)

    return model
Exemplo n.º 33
0

real_data_int = T.itensor4('images')
real_data = (T.cast(real_data_int, 'float32') * (2. / 255) - 1.).reshape(
    (-1, 64 * 64 * 3))

fake_data = Generator(BATCH_SIZE)

disc_out = Discriminator(T.concatenate([real_data, fake_data], axis=0))
disc_real = disc_out[:BATCH_SIZE]
disc_fake = disc_out[BATCH_SIZE:]

gen_cost = -T.mean(Discriminator(fake_data))
disc_cost = T.mean(disc_fake) - T.mean(disc_real)

alpha = srng.uniform(size=(BATCH_SIZE, 1), low=0., high=1.)
differences = fake_data - real_data
interpolates = real_data + (alpha * differences)
gradients = T.grad(T.sum(Discriminator(interpolates)), interpolates)
slopes = T.sqrt(T.sum(T.sqr(gradients), axis=1))
lipschitz_penalty = T.mean((slopes - 1.)**2)
disc_cost += 10 * lipschitz_penalty

gen_params = lib.search(
    gen_cost, lambda x: hasattr(x, 'param') and 'Generator' in x.name)
discrim_params = lib.search(
    disc_cost, lambda x: hasattr(x, 'param') and 'Discriminator' in x.name)

gen_grads = T.grad(gen_cost, gen_params)
discrim_grads = T.grad(disc_cost, discrim_params)
gen_grads = [T.clip(g, lib.floatX(-1.0), lib.floatX(1.0)) for g in gen_grads]
Exemplo n.º 34
0
def train(num_epochs,
          filename,
          gen_lr=5e-5,
          beta_1_gen=0.5,
          beta_1_disc=0.5,
          print_freq=50,
          disc_lr=5e-5,
          num_iter_gen=1,
          n_samples=20,
          image_dir=None,
          binary_dir=None,
          gt_image_dir=None):
    
    f = h5py.File('/home/devon/Data/basic/celeba_64.hdf5', 'r')
    arr = f['features'][:1000]
    arr = arr.transpose(0, 2, 3, 1)
    arr = arr.reshape((arr.shape[0] * arr.shape[1], arr.shape[2], arr.shape[3]))
    img = Image.fromarray(arr).convert('P', palette=Image.ADAPTIVE, colors=16)

    
    # Load the dataset
    log_file = open(filename, 'w')
    print("Loading data...")
    print("Testing RW_DCGAN ...")
    log_file.write("Testing RW_DCGAN...\n")
    log_file.write("Loading data...\n")
    log_file.write("Num_epochs: {}, disc_lr: {}, gen_lr: {}\n".format(num_epochs,
                                                                      disc_lr,
                                                                      gen_lr))
    log_file.flush()
    train_stream, test_stream = load_stream(img=img)
    # Prepare Theano variables for inputs and targets
    noise_var = T.matrix('noise')
    input_var = T.tensor4('inputs')
    #target_var = T.ivector('targets')

    # Create neural network model
    print("Building model and compiling GAN functions...")
    log_file.write("Building model and compiling GAN functions...\n")
    parameter = initial_parameters()
    generator = build_generator(parameter, noise_var)
    discriminator = build_discriminator(input_var)

    trng = RandomStreams(random.randint(1, 1000000))

    # Sample
    batch_size = noise_var.shape[0]
    dim_c = input_var.shape[1]
    dim_x = input_var.shape[2]
    dim_y = input_var.shape[3]
    
    R = trng.uniform(size=(n_samples, batch_size, dim_c, dim_x, dim_y), dtype=floatX)

    g_output = lasagne.layers.get_output(generator)
    samples = (R <= T.shape_padleft(g_output)).astype(floatX)

    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator)
    fake_out = lasagne.layers.get_output(
        discriminator, samples.reshape(
            (n_samples * batch_size, dim_c, dim_x, dim_y)))
    fake_out_ = fake_out.reshape((n_samples, batch_size))
    
    log_d1 = -T.nnet.softplus(-fake_out_)
    log_d0 = -(fake_out_ + T.nnet.softplus(-fake_out_))
    log_w = log_d1 - log_d0
    g_output_ = T.shape_padleft(T.clip(g_output, 1e-7, 1. - 1e-7))
    log_g = (samples * T.log(g_output_) + (1. - samples) * T.log(1. - g_output_)).sum(axis=(2, 3, 4))

    # Find normalized weights.
    log_N = T.log(log_w.shape[0]).astype(floatX)
    #log_Z_est = T.maximum(log_sum_exp(log_w - log_N, axis=0), -4)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_Z_est_ = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    w_tilde_ = theano.gradient.disconnected_grad(w_tilde)

    #Create gen_loss
    generator_loss = -(w_tilde_ * log_g).sum(0).mean()
    #generator_loss = (T.nnet.softplus(-fake_out)).mean() -- Original GAN loss

    # Create disc_loss
    discriminator_loss = (T.nnet.softplus(-real_out)).mean() + (T.nnet.softplus(-fake_out)).mean() + fake_out.mean()

    # Create update expressions for training
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)

    # Losses / updates
    
    generator_updates = lasagne.updates.adam(
        generator_loss, generator_params, learning_rate=gen_lr, beta1=beta_1_gen)
    discriminator_updates = lasagne.updates.adam(
        discriminator_loss, discriminator_params, learning_rate=disc_lr,
        beta1=beta_1_disc)

    '''
    generator_updates = lasagne.updates.rmsprop(
        generator_loss, generator_params, learning_rate=gen_lr)
    discriminator_updates = lasagne.updates.rmsprop(
        discriminator_loss, discriminator_params, learning_rate=disc_lr)
    '''

    train_discriminator = theano.function([noise_var, input_var],
                               [(real_out > 0.).mean(), discriminator_loss],
                                allow_input_downcast=True,
                               updates=discriminator_updates)

    train_generator = theano.function([noise_var, input_var],
                               [(fake_out < 0.).mean(),
                                generator_loss, log_Z_est_.mean()],
                                allow_input_downcast=True,
                               updates=generator_updates)

    # Compile another function generating some data
    gen_fn = theano.function([noise_var],
                             lasagne.layers.get_output(generator,
                                                       deterministic=True))

    # Finally, launch the training loop.
    print("Starting training of GAN...")
    log_file.write("Starting training of GAN...\n")
    log_file.flush()
    # We iterate over epochs:
    
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        print("Epoch: ", epoch)
        train_err = 0
        train_batches = 0
        start_time = time.time()
        prefix = "ep_{}".format(epoch)
        
        for batch in train_stream.get_epoch_iterator():
            inputs = np.array(batch[0], dtype=np.float32)
            noise = lasagne.utils.floatX(np.random.rand(len(inputs), 100))

            samples_print_gt = convert_to_rgb(inputs, img)
            print_images(samples_print_gt[:64], 8, 8, file=gt_image_dir + prefix + '_gt.png')
            
            train_discriminator(noise, inputs)
            disc_train_out = train_discriminator(noise, inputs)
            p_real, disc_loss = disc_train_out

            gen_loss_array = []
            p_fake_array = []
            z_est_array = []

            for i in range(num_iter_gen):
                gen_train_out = train_generator(noise, inputs)
                p_fake, gen_loss, z_est = gen_train_out
                gen_loss_array.append(gen_loss)
                p_fake_array.append(p_fake)
                z_est_array.append(z_est)

            gen_loss = np.mean(gen_loss_array)
            p_fake = np.mean(p_fake_array)
            z_est = np.mean(z_est_array)

            train_batches += 1
            if train_batches % print_freq == 0:
                print('-' * 80)
                print("Batch Number: {}, Epoch Number: {}".format(train_batches + 1, epoch + 1))
                print("Generator: p_fake: {}, gen_loss: {}, z_est: {}".format(p_fake, gen_loss, z_est))
                print("Discriminator: p_real: {}, disc_loss: {}".format(p_real, disc_loss))
                log_file.write('-' * 80 + '\n')
                log_file.write("Batch Number: {}".format(train_batches + 1, epoch + 1) + '\n')
                log_file.write("Generator: p_fake: {}, gen_loss: {} \n".format(p_fake, disc_loss))
                log_file.write("Discriminator: p_real: {}, disc_loss: {} \n".format(p_real, disc_loss))
                log_file.write('-' * 80 + '\n')
                samples = gen_fn(lasagne.utils.floatX(np.random.rand(5000, 100)))
                samples = (samples >= 0.5).astype('int')
                samples = samples[0:49]
                samples_print = convert_to_rgb(samples, img)
                print_images(samples_print, 7, 7, file=image_dir + prefix + "_{}".format(train_batches) +'_gen.png')
                
                samples_print_gt = convert_to_rgb(inputs, img)
                print_images(samples_print_gt[:64], 8, 8, file=gt_image_dir + prefix + '_gt.png')

        # Then we print the results for this epoch:
        print("Total Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        log_file.write("Total Epoch {} of {} took {:.3f}s\n".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{}".format(train_err / train_batches))
        log_file.write("  training loss:\t{}\n".format(train_err / train_batches))
        log_file.flush()

        # And finally, we plot some generated data
        samples = gen_fn(lasagne.utils.floatX(np.random.rand(5000, 100)))
        samples = (samples >= 0.5).astype('int')
        samples = samples[0:49]
        samples_print = convert_to_rgb(samples, img)
        print_images(samples_print, 7, 7, file=image_dir + prefix + '_gen.png')
        #if epoch == num_epochs - 1: #save binary data for further calculation
        np.savez(binary_dir + prefix + '_celeba_gen_params.npz', *lasagne.layers.get_all_param_values(generator))


    log_file.flush()
    log_file.close()
Exemplo n.º 35
0
class ConcreteSampleLayer(lasagne.layers.Layer):
    """
    Sampling layer supporting importance sampling as described in [BURDA]_ and
    multiple Monte Carlo samples for the approximation of
    E_q [log( p(x,z) / q(z|x) )].

    Parameters
    ----------
    mu : class:`Layer` instance
        Parameterizing the mean of the distribution to sample
        from as described in [BURDA]_.

    log_var : class:`Layer` instance
        By default assumed to parametrize log(sigma^2) of the distribution to
        sample from as described in [BURDA]_ which is transformed to sigma using
        the nonlinearity function as described below. Effectively this means
        that the nonlinearity function controls what log_var parametrizes. A few
        common examples:
        -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default]
        -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2
        -nonlinearity = lambda x: x => log_var = sigma

    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
        q(z|x) in eq. (8) in [BURDA]_.

    iw_samples : int or T.scalar
        Number of importance samples in the sum over k in eq. (8) in [BURDA]_.

    nonlinearity : callable or None
        The nonlinearity that is applied to the log_var input layer to transform
        it into a standard deviation. By default we assume that
        log_var = log(sigma^2) and hence the corresponding nonlinearity is
        f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov.
            "Importance Weighted Autoencoders."
            arXiv preprint arXiv:1509.00519 (2015).
    """
    def __init__(self,
                 logits,
                 eq_samples=1,
                 iw_samples=1,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(ConcreteSampleLayer, self).__init__(logits, **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        batch_size, num_latent = input_shapes
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size * self.eq_samples * self.iw_samples,
                       num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, deterministic=False, **kwargs):
        logits = input
        batch_size, num_latent = logits.shape

        if deterministic:
            p = T.nnet.softmax(logits)
            z = p.dimshuffle(0, 'x', 'x', 1) * T.ones(
                (batch_size, self.eq_samples, self.iw_samples, num_latent))
            z_reshaped = z.reshape((-1, num_latent))
        else:
            shape = (batch_size, self.eq_samples, self.iw_samples, num_latent)
            U = self._srng.uniform(shape, dtype=theano.config.floatX)
            gumbel_sample = -T.log(-T.log(U + 1e-4) + 1e-4)
            y = logits.dimshuffle(0, 'x', 'x', 1) + gumbel_sample
            y_reshaped = y.reshape((-1, num_latent))
            z_reshaped = T.nnet.softmax(y_reshaped / 1)

        return z_reshaped
Exemplo n.º 36
0
    def sampling(self, num_samples, epoch):

        sample_batch_size = 10
        self.args.batch_size = sample_batch_size

        rng = np.random.RandomState(self.args.seed)  # fixed random seeds
        theano_rng = MRG_RandomStreams(rng.randint(2**15))
        lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
        data_rng = np.random.RandomState(self.args.seed_data)
        self.G_weights_layer = nn.softmax_weights(
            self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input))
        self.D_weights_layer = nn.softmax_weights(
            self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input))

        self.G_weights = LL.get_output(self.G_weights_layer,
                                       None,
                                       deterministic=True)
        self.D_weights = LL.get_output(self.D_weights_layer,
                                       None,
                                       deterministic=True)

        Gen_x_list = []
        z = theano_rng.uniform(size=(sample_batch_size, 50))
        y_1hot = T.fmatrix()
        for i in range(self.args.ng):
            gen_layers_i, gen_x_i = self.get_generator(self.meanx, z, y_1hot)
            self.G_layers.append(gen_layers_i)
            Gen_x_list.append(gen_x_i)

        for i in range(self.args.nd):
            disc_layers_i, disc_layer_adv_i, disc_layer_z_recon_i = self.get_discriminator(
            )
            self.D_layers.append(disc_layers_i)
            self.D_layer_adv.append(disc_layer_adv_i)
            self.D_layer_z_recon.append(disc_layer_z_recon_i)

        self.load_model(epoch)

        samplefun_list = []
        for i in range(self.args.ng):
            samplefun_list.append(
                th.function(inputs=[self.meanx, y_1hot],
                            outputs=Gen_x_list[i]))

        mix_weights = th.function(inputs=[self.dummy_input],
                                  outputs=[self.G_weights])
        g_mix_weights = mix_weights(0.0)
        ''' load mean img '''
        meanimg = np.load('data/meanimg.npy')

        samples = []
        prob_list = g_mix_weights[0].tolist()[0]
        prob_list[-1] = 1 - sum(prob_list[:-1])

        refy = np.zeros((sample_batch_size, ), dtype=np.int)
        for i in range(sample_batch_size):
            refy[i] = i % 10

        refy_1hot = np.zeros((sample_batch_size, 10), dtype=np.float32)
        refy_1hot[np.arange(sample_batch_size), refy] = 1
        for k in range(num_samples // sample_batch_size):
            Gen_indx = np.random.choice(5, 1, p=prob_list)
            #Gen_indx = [4]
            #z = np.uniform(size=(sample_batch_size, 16), dtype=np.float32)
            imgs = samplefun_list[Gen_indx[0]](meanimg, refy_1hot)
            imgs = imgs + meanimg
            imgs = np.transpose(
                np.reshape(imgs, (sample_batch_size, 3, 32, 32)), (0, 2, 3, 1))
            samples.append(imgs)

        samples = np.concatenate(samples, 0)
        np.save(
            'sampled_imgs/MixGAN_samples_for_inception_epoch%d.npy' % epoch,
            samples)
        scipy.misc.imsave("cifar_samples_mixgan_%d.png" % epoch, samples[20])
Exemplo n.º 37
0
def random_uniform(shape, low=0.0, high=1.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    rng = RandomStreams(seed=seed)
    return rng.uniform(shape, low=low, high=high, dtype=dtype)
Exemplo n.º 38
0
parser.add_argument('--seed', type=int, default=1)
parser.add_argument('--seed_data', type=int, default=1)
parser.add_argument('--unlabeled_weight', type=float, default=10)
parser.add_argument('--batch_size', type=int, default=5)
parser.add_argument('--count', type=int, default=10)
args = parser.parse_args()
print(args)

# fixed random seeds
rng = np.random.RandomState(args.seed)
theano_rng = MRG_RandomStreams(rng.randint(2**15))
lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
data_rng = np.random.RandomState(args.seed_data)

# specify generative model
noise = theano_rng.uniform(size=(args.batch_size, 3000))
gen_layers = [LL.InputLayer(shape=(args.batch_size, 3000), input_var=noise)]
gen_layers.append(
    nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                num_units=500,
                                nonlinearity=T.nnet.softplus),
                  g=None))
gen_layers.append(
    nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                num_units=500,
                                nonlinearity=T.nnet.softplus),
                  g=None))
gen_layers.append(
    nn.l2normalize(
        LL.DenseLayer(gen_layers[-1],
                      num_units=28**2,
Exemplo n.º 39
0
def gpu_evaluate(gru,
                 test_data,
                 items=None,
                 session_key='SessionId',
                 item_key='ItemId',
                 time_key='Time',
                 cut_off=20,
                 batch_size=100,
                 mode='conservative'):
    if gru.error_during_train: raise Exception
    print('Measuring Recall@{} and MRR@{}'.format(cut_off, cut_off))
    srng = RandomStreams()
    X = T.ivector()
    Y = T.ivector()
    M = T.iscalar()
    C = []
    yhat, H, updatesH = gru.symbolic_predict(X, Y, M, items, batch_size)
    if mode == 'tiebreaking': yhat += srng.uniform(size=yhat.shape) * 1e-10
    if items is None:
        targets = T.diag(yhat.T[Y])
        others = yhat.T
    else:
        targets = T.diag(yhat.T[:M])
        others = yhat.T[M:]
    if mode == 'standard':
        ranks = (others > targets).sum(axis=0) + 1
    elif mode == 'conservative':
        ranks = (others >= targets).sum(axis=0)
    elif mode == 'median':
        ranks = (others > targets).sum(axis=0) + 0.5 * (
            (others == targets).sum(axis=0) - 1) + 1
    elif mode == 'tiebreaking':
        ranks = (others > targets).sum(axis=0) + 1
    else:
        raise NotImplementedError
    REC = (ranks <= cut_off).sum()
    MRR = ((ranks <= cut_off) / ranks).sum()
    evaluate = theano.function(inputs=[X, Y, M] + C,
                               outputs=[REC, MRR],
                               updates=updatesH,
                               allow_input_downcast=True,
                               on_unused_input='ignore')
    test_data = pd.merge(test_data,
                         pd.DataFrame({
                             'ItemIdx': gru.itemidmap.values,
                             item_key: gru.itemidmap.index
                         }),
                         on=item_key,
                         how='inner')
    test_data.sort_values([session_key, time_key, item_key], inplace=True)
    test_data_items = test_data.ItemIdx.values
    if items is not None:
        item_idxs = gru.itemidmap[items]
    recall, mrr, n = 0, 0, 0
    iters = np.arange(batch_size)
    maxiter = iters.max()
    offset_sessions = np.zeros(test_data[session_key].nunique() + 1,
                               dtype=np.int32)
    offset_sessions[1:] = test_data.groupby(session_key).size().cumsum()
    start = offset_sessions[iters]
    end = offset_sessions[iters + 1]
    finished = False
    cidxs = []
    while not finished:
        minlen = (end - start).min()
        out_idx = test_data_items[start]
        for i in range(minlen - 1):
            in_idx = out_idx
            out_idx = test_data_items[start + i + 1]
            if items is not None:
                y = np.hstack([out_idx, item_idxs])
            else:
                y = out_idx
            rec, m = evaluate(in_idx, y, len(iters), *cidxs)
            recall += rec
            mrr += m
            n += len(iters)
        start = start + minlen - 1
        finished_mask = (end - start <= 1)
        n_finished = finished_mask.sum()
        iters[finished_mask] = maxiter + np.arange(1, n_finished + 1)
        maxiter += n_finished
        valid_mask = (iters < len(offset_sessions) - 1)
        n_valid = valid_mask.sum()
        if n_valid == 0:
            finished = True
            break
        mask = finished_mask & valid_mask
        sessions = iters[mask]
        start[mask] = offset_sessions[sessions]
        end[mask] = offset_sessions[sessions + 1]
        iters = iters[valid_mask]
        start = start[valid_mask]
        end = end[valid_mask]
        if valid_mask.any():
            for i in range(len(H)):
                tmp = H[i].get_value(borrow=True)
                tmp[mask] = 0
                tmp = tmp[valid_mask]
                H[i].set_value(tmp, borrow=True)
    return recall / n, mrr / n
Exemplo n.º 40
0
def build_model(d_params, g_params, s_params, options):

    trng = RandomStreams(SEED)
    x = tensor.matrix('x', dtype='int32')  # n_sample * n_emb  where is n_word
    if options['debug']:
        x.tag.test_value = np.random.randint(2, size=(64, 40)).astype(
            'int32')  # batchsize * sent_len(n_word)  item: 0-voc_size
    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    # generative model part
    z = tensor.matrix('z', dtype='float32')  # n_batch * n_feature
    n_z = z.shape[0]

    n_samples = options['batch_size']
    n_words = options['n_words']
    n_x = d_params['Wemb'].shape[1]  #embeding dim
    if options['shareLSTM']:
        h_decoder = decoder_g(g_params,
                              z,
                              options,
                              max_step=options['max_step'],
                              prefix='decoder_0')
    else:
        z_code = tensor.cast(z[:, 0], dtype='int32')
        h_decoder = tensor.zeros(
            [options['max_step'], n_samples, options['n_h']])

        h_temp = []
        for idx in range(options['n_codes']):
            temp_idx = tensor.eq(z_code, idx).nonzero()[0]
            if options['sharedEmb']:
                h_decoder_temp = decoder_emb_from_d(
                    g_params,
                    d_params,
                    z[:, 1:],
                    options,
                    max_step=options['max_step'],
                    prefix=_p('decoder', idx))
            else:
                h_decoder_temp = decoder_g(g_params,
                                           z[:, 1:],
                                           options,
                                           max_step=options['max_step'],
                                           prefix=_p('decoder', idx))
            h_temp.append(h_decoder_temp)
            h_decoder = tensor.inc_subtensor(h_decoder[:, temp_idx, :],
                                             h_temp[idx][:, temp_idx, :])

    #h_decoder = dropout(h_decoder, trng, use_noise)
    # reconstruct the original sentence
    shape_w = h_decoder.shape  # n_step, n_sample , n_h
    h_decoder = h_decoder.reshape((shape_w[0] * shape_w[1], shape_w[2]))

    # pred_w: (n_steps * n_samples) * n_words
    if options['sharedEmb']:
        Vhid = tensor.dot(g_params['Vhid'], d_params['Wemb'].T)
    else:
        Vhid = tensor.dot(g_params['Vhid'], g_params['Wemb'].T)
    pred_w = tensor.dot(h_decoder, Vhid) + g_params['bhid']
    n_steps = shape_w[0]

    #  nondifferentiable
    if options['delta'] > 1e-10:
        pred_w = tensor.switch(tensor.ge(pred_w, options['delta']), pred_w, 0)
    #pred_w = tensor.nnet.softmax(pred_w*options['L'])
    max_w = tensor.max(pred_w, axis=1, keepdims=True)
    e0 = tensor.exp((pred_w - max_w) * options['L'])
    pred_w = e0 / tensor.sum(e0, axis=1, keepdims=True)

    max_print = tensor.max(pred_w, axis=1)
    max_print = max_print.reshape((n_steps, n_samples)).dimshuffle(1, 0)

    pred_w = pred_w.reshape(
        (n_steps, n_samples,
         n_words)).dimshuffle(1, 0, 2)  # reshape need parenthesis

    if options['force_cut'] == 'cut':
        rng_temp = tensor.minimum(
            -tensor.sum(tensor.log(trng.uniform(
                (n_samples, 6))), axis=1) * 3.3, options['max_step'] - 5)
        rng_length = tensor.floor(rng_temp).astype('int32')  #gamma(6,3.3)
        # pred_mask = tensor.zeros(pred_w.shape)
        period = options['period']
        # should use set values
        for i in xrange(n_samples):
            pred_w = tensor.set_subtensor(pred_w[i, rng_length[i]:, :], 0)
            pred_w = tensor.set_subtensor(pred_w[i, rng_length[i], period], 1)
            pred_w = tensor.set_subtensor(pred_w[i, (rng_length[i] + 1):, 0],
                                          1)
    elif options['force_cut'] == 'strip':
        for i in xrange(n_samples):
            pred_w = tensor.set_subtensor(
                pred_w[i, options['max_step'] - 1, 0], 1)
            idx_end = theano.tensor.eq(tensor.argmax(pred_w[i, :, :], axis=1),
                                       0).nonzero()[0][0]
            pred_w = tensor.set_subtensor(pred_w[i, (idx_end + 1):, 0], 1)
            pred_w = tensor.set_subtensor(pred_w[i, (idx_end + 1):, 1:], 0)

    pad = max(options['filter_hs']) - 1
    end_mat = tensor.concatenate([
        tensor.ones([n_samples, pad, 1]),
        tensor.zeros([n_samples, pad, n_words - 1])
    ],
                                 axis=2)
    pred_w = tensor.concatenate([end_mat, pred_w, end_mat], axis=1)

    n_steps = n_steps + 2 * pad
    pred_w = pred_w.reshape((n_steps * n_samples, n_words))

    # should be d's embeding
    fake_input = tensor.dot(pred_w, d_params['Wemb'])

    #  real[ 64   1  68 300] fake[ 64   1  41 300]
    fake_input = fake_input.reshape(
        (n_samples, 1, n_steps, d_params['Wemb'].shape[1]))  #(64,1,  )
    use_noise2 = theano.shared(numpy_floatX(0.))
    fake_input = dropout(fake_input, trng, use_noise2)

    # fake feature output
    fake_outputs1 = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(d_params,
                             fake_input,
                             filter_shape,
                             pool_size,
                             options,
                             prefix=_p('cnn_d', i))
        fake_output1 = conv_layer
        fake_outputs1.append(fake_output1)

    fake_output1 = tensor.concatenate(fake_outputs1, 1)  # should be 64*900
    if options['batch_norm']:
        fake_output1 = batch_norm(d_params,
                                  fake_output1,
                                  options,
                                  prefix='fake')

    if options['cnn_activation'] == 'tanh':
        fake_pred = mlp_layer_linear(d_params, fake_output1, prefix='dis_d')
    elif options['cnn_activation'] == 'linear':
        fake_pred = mlp_layer_linear(d_params,
                                     tensor.tanh(fake_output1),
                                     prefix='dis_d')  #

    if not options['wgan']:
        fake_pred = tensor.nnet.sigmoid(fake_pred) * (
            1 - 2 * options['label_smoothing']) + options['label_smoothing']

    # for reverse model
    # if options['reverse']:
    fake_recon = mlp_layer_tanh(d_params, fake_output1, prefix='recon')
    r_t = fake_recon / 2.0 + .5
    z_t = z / 2.0 + .5
    r_cost = (-z_t * tensor.log(r_t + 0.0001) -
              (1. - z_t) * tensor.log(1.0001 - r_t)).sum() / n_samples / n_z

    # Proposal nets (for infogan)
    fake_outputs2 = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(g_params,
                             fake_input,
                             filter_shape,
                             pool_size,
                             options,
                             prefix=_p('cnn_d', i))
        fake_output2 = conv_layer
        fake_outputs2.append(fake_output2)
    fake_output2 = tensor.concatenate(
        fake_outputs2, 1)  # should be 64*900     # why it is 64*0???

    # check whether to use softmax or tanh
    fake_propose = mlp_layer_tanh(g_params, fake_output2, prefix='dis_q')
    fake_propose = (fake_propose + 1) / 2
    fake_propose = tensor.log(fake_propose)
    z_code = tensor.cast(z[:, 0], dtype='int32')
    z_index = tensor.arange(n_z)
    fake_logent = fake_propose[z_index, z_code]
    l_I = tensor.sum(fake_logent)

    # Wemb: voc_size(n_words) * n_emb       64* 1* 40 *48
    real_input = d_params['Wemb'][tensor.cast(
        x.flatten(), dtype='int32')].reshape(
            (x.shape[0], 1, x.shape[1],
             d_params['Wemb'].shape[1]))  # n_sample,1,n_length,n_emb
    real_input = dropout(real_input, trng, use_noise2)

    real_outputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer2 = encoder(d_params,
                              real_input,
                              filter_shape,
                              pool_size,
                              options,
                              prefix=_p('cnn_d', i))
        real_output = conv_layer2
        real_outputs.append(real_output)
    real_output = tensor.concatenate(real_outputs, 1)

    if options['batch_norm']:
        real_output = batch_norm(d_params, real_output, options, prefix='real')

    if options['cnn_activation'] == 'tanh':
        real_pred = mlp_layer_linear(d_params, real_output, prefix='dis_d')
    elif options['cnn_activation'] == 'linear':
        real_pred = mlp_layer_linear(d_params,
                                     tensor.tanh(real_output),
                                     prefix='dis_d')

    if not options['wgan']:
        real_pred = tensor.nnet.sigmoid(real_pred) * (
            1 - 2 * options['label_smoothing']) + options['label_smoothing']

    #Compute for KDE
    mu = real_output
    X = fake_output1
    KDE = cal_nkde(X, mu, options['kde_sigma'])

    #calculate KDE on real_input and fake_input
    X_i = fake_input.reshape((n_samples, n_steps * d_params['Wemb'].shape[1]))
    mu_i = real_input.reshape((n_samples, n_steps * d_params['Wemb'].shape[1]))
    KDE_input = cal_nkde(X_i, mu_i, options['kde_sigma'])

    # sufficient statistics
    cur_size = s_params['seen_size'] * 1.0
    identity = tensor.eye(options['n_z']) * options['diag']
    fake_mean = tensor.mean(fake_output1, axis=0)
    real_mean = tensor.mean(real_output, axis=0)
    fake_xx = tensor.dot(fake_output1.T, fake_output1)
    real_xx = tensor.dot(real_output.T, real_output)
    acc_fake_xx = (s_params['acc_fake_xx'] * cur_size + fake_xx) / (cur_size +
                                                                    n_samples)
    acc_real_xx = (s_params['acc_real_xx'] * cur_size + real_xx) / (cur_size +
                                                                    n_samples)
    acc_fake_mean = (s_params['acc_fake_mean'] * cur_size +
                     fake_mean * n_samples) / (cur_size + n_samples)
    acc_real_mean = (s_params['acc_real_mean'] * cur_size +
                     real_mean * n_samples) / (cur_size + n_samples)

    cov_fake = acc_fake_xx - tensor.dot(acc_fake_mean.dimshuffle(0, 'x'),
                                        acc_fake_mean.dimshuffle(
                                            0, 'x').T) + identity
    cov_real = acc_real_xx - tensor.dot(acc_real_mean.dimshuffle(0, 'x'),
                                        acc_real_mean.dimshuffle(
                                            0, 'x').T) + identity

    cov_fake_inv = tensor.nlinalg.matrix_inverse(cov_fake)
    cov_real_inv = tensor.nlinalg.matrix_inverse(cov_real)

    if options['feature_match'] == 'moment':
        temp1 = ((fake_mean - real_mean)**2).sum()
        fake_obj = temp1

    elif options['feature_match'] == 'JSD_acc':

        temp1 = tensor.nlinalg.trace(
            tensor.dot(cov_fake_inv, cov_real) +
            tensor.dot(cov_real_inv, cov_fake))
        temp2 = tensor.dot(
            tensor.dot((acc_fake_mean - acc_real_mean),
                       (cov_fake_inv + cov_real_inv)),
            (acc_fake_mean - acc_real_mean).T)

        fake_obj = temp1 + temp2

    elif options['feature_match'] == 'mmd':
        #### too many nodes, use scan ####
        kxx, kxy, kyy = 0, 0, 0
        dividend = 1
        dist_x, dist_y = fake_output1 / dividend, real_output / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))

        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_cov':
        kxx, kxy, kyy = 0, 0, 0
        cov_sum = (cov_fake + cov_real) / 2
        cov_sum_inv = tensor.nlinalg.matrix_inverse(cov_sum)

        dividend = 1
        dist_x, dist_y = fake_output1 / dividend, real_output / dividend
        cov_inv_mat = cov_sum_inv
        x_sq = tensor.sum(tensor.dot(dist_x, cov_inv_mat) * dist_x,
                          axis=1).dimshuffle(0, 'x')
        y_sq = tensor.sum(tensor.dot(dist_y, cov_inv_mat) * dist_y,
                          axis=1).dimshuffle(0, 'x')

        tempxx = -2 * tensor.dot(tensor.dot(dist_x, cov_inv_mat),
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(tensor.dot(dist_x, cov_inv_mat),
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(tensor.dot(dist_y, cov_inv_mat),
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))
        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_ld':

        kxx, kxy, kyy = 0, 0, 0
        real_mmd = mlp_layer_tanh(d_params, real_output, prefix='dis_mmd')
        fake_mmd = mlp_layer_tanh(d_params, fake_output1, prefix='dis_mmd')

        dividend = options['dim_mmd']  # for numerical stability & scale with
        dist_x, dist_y = fake_mmd / dividend, real_mmd / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.exp(-tempxx / 2 / sigma).sum()
            kxy += tensor.exp(-tempxy / 2 / sigma).sum()
            kyy += tensor.exp(-tempyy / 2 / sigma).sum()

        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_h':
        #### too many nodes, use scan ####

        kxx, kxy, kyy = 0, 0, 0

        if options['cnn_activation'] == 'tanh':
            fake_mmd = middle_layer(d_params, fake_output1, prefix='dis_d')
        elif options['cnn_activation'] == 'linear':
            fake_mmd = middle_layer(d_params,
                                    tensor.tanh(fake_output1),
                                    prefix='dis_d')  #

        if options['cnn_activation'] == 'tanh':
            real_mmd = middle_layer(d_params, real_output, prefix='dis_d')
        elif options['cnn_activation'] == 'linear':
            real_mmd = middle_layer(d_params,
                                    tensor.tanh(real_output),
                                    prefix='dis_d')  #

        dividend = 1
        dist_x, dist_y = fake_mmd / dividend, real_mmd / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))
        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    else:
        fake_obj = -tensor.log(fake_pred + 1e-6).sum() / n_z

    if options['wgan']:
        gan_cost_d = fake_pred.sum() / n_z - real_pred.sum() / n_samples
        gan_cost_g = -fake_pred.sum() / n_z + 0 * (
            (fake_mean - acc_real_mean)**2).sum()
    else:
        gan_cost_d = -tensor.log(1 - fake_pred + 1e-6).sum(
        ) / n_z - tensor.log(real_pred + 1e-6).sum() / n_samples
        gan_cost_g = fake_obj

    #result4 = fake_obj
    d_cost = gan_cost_d - options['lambda_fm'] * fake_obj + options[
        'lambda_recon'] * r_cost + options['lambda_q'] * l_I / n_z
    g_cost = gan_cost_g - options['lambda_q'] * l_I / n_z

    #result1, result2, result4, result5, result6 = x_sq, y_sq, tempxx, tempxy, tempyy

    result1 = tensor.mean(real_pred)  # goes to nan
    result2 = tensor.mean(fake_pred)  # goes to nan
    result3 = tensor.argmax(pred_w, axis=1).reshape([n_samples, n_steps])
    result4 = tensor.nlinalg.trace(
        tensor.dot(cov_fake_inv, cov_real) +
        tensor.dot(cov_real_inv, cov_fake))
    result5 = max_print[
        0]  #mu  #tensor.dot( tensor.dot((acc_fake_mean - acc_real_mean) , (cov_fake_inv + cov_real_inv)), (acc_fake_mean - acc_real_mean).T)
    result6 = ((fake_mean - real_mean)**2).sum()

    return use_noise, use_noise2, x, z, d_cost, g_cost, r_cost, fake_recon, acc_fake_xx, acc_real_xx, acc_fake_mean, acc_real_mean, result1, result2, result3, result4, result5, result6, KDE, KDE_input
Exemplo n.º 41
0
class RandomizedRectifierLayer(Layer):
    """
    A layer that applies a randomized leaky rectify nonlinearity to its input.

    The randomized leaky rectifier was first proposed and used in the Kaggle
    NDSB Competition, and later evaluated in [1]_. Compared to the standard
    leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
    for negative input during training, and a fixed slope during evaluation.

    Equation for the randomized rectifier linear unit during training:
    :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

    During evaluation, the factor is fixed to the arithmetic mean of `lower`
    and `upper`.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or a tuple
        The layer feeding into this layer, or the expected input shape

    lower : Theano shared variable, expression, or constant
        The lower bound for the randomly chosen slopes.

    upper : Theano shared variable, expression, or constant
        The upper bound for the randomly chosen slopes.

    shared_axes : 'auto', 'all', int or tuple of int
        The axes along which the random slopes of the rectifier units are
        going to be shared. If ``'auto'`` (the default), share over all axes
        except for the second - this will share the random slope over the
        minibatch dimension for dense layers, and additionally over all
        spatial dimensions for convolutional layers. If ``'all'``, share over
        all axes, thus using a single random slope.

    **kwargs
        Any additional keyword arguments are passed to the `Layer` superclass.

     References
    ----------
    .. [1] Bing Xu, Naiyan Wang et al. (2015):
       Empirical Evaluation of Rectified Activations in Convolutional Network,
       http://arxiv.org/abs/1505.00853
    """
    def __init__(self,
                 incoming,
                 lower=0.3,
                 upper=0.8,
                 shared_axes='auto',
                 **kwargs):
        super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs)
        self._srng = RandomStreams(get_rng().randint(1, 2147462579))
        self.lower = lower
        self.upper = upper

        if not isinstance(lower > upper, theano.Variable) and lower > upper:
            raise ValueError("Upper bound for RandomizedRectifierLayer needs "
                             "to be higher than lower bound.")

        if shared_axes == 'auto':
            self.shared_axes = (0, ) + tuple(range(2, len(self.input_shape)))
        elif shared_axes == 'all':
            self.shared_axes = tuple(range(len(self.input_shape)))
        elif isinstance(shared_axes, int):
            self.shared_axes = (shared_axes, )
        else:
            self.shared_axes = shared_axes

    def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true, the arithmetic mean of lower and upper are used for the
            leaky slope.
        """
        if deterministic or self.upper == self.lower:
            return theano.tensor.nnet.relu(input,
                                           (self.upper + self.lower) / 2.0)
        else:
            shape = list(self.input_shape)
            if any(s is None for s in shape):
                shape = list(input.shape)
            for ax in self.shared_axes:
                shape[ax] = 1

            rnd = self._srng.uniform(tuple(shape),
                                     low=self.lower,
                                     high=self.upper,
                                     dtype=theano.config.floatX)
            rnd = theano.tensor.addbroadcast(rnd, *self.shared_axes)
            return theano.tensor.nnet.relu(input, rnd)
Exemplo n.º 42
0
        assert np.abs(np.mean(l.avg_batch_mean.get_value()) - 0)>1e-7

# input variables
y = T.ivector()
y_1hot = T.matrix()
x = T.tensor4()
meanx = T.tensor3()
# real_fc3 = LL.get_output(enc_layer_fc3, x, deterministic=True)

#y_pred, real_pool3 = LL.get_output([fc8, poo5], x, deterministic=False)
# real_pool3 = LL.get_output(poo5, x, deterministic=False)
#enc_error = T.mean(T.neq(T.argmax(y_pred,axis=1),y)) # classification error of the encoder, to make sure the encoder is working properly


# specify generator, gen_x = G(z, real_pool3)
z = theano_rng.uniform(size=(args.batch_size, 50)) # uniform noise
# y_1hot = T.matrix()
gen_x_layer_z = LL.InputLayer(shape=(args.batch_size, 50), input_var=z) # z, 20
# gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64

gen_x_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations
gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768
gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (args.batch_size,256,5,5))
gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half',
                 W=Normal(0.02),  nonlinearity=nn.relu))
gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid',
                 W=Normal(0.02),  nonlinearity=nn.relu))

gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half',
                 W=Normal(0.02),  nonlinearity=nn.relu))
gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid',
Exemplo n.º 43
0
class Ensemble:
    """An ensemble is a collection of neurons representing a vector space.
    
    """
    def __init__(self,
                 neurons,
                 dimensions,
                 dt,
                 tau_ref=0.002,
                 tau_rc=0.02,
                 max_rate=(200, 300),
                 intercept=(-1.0, 1.0),
                 radius=1.0,
                 encoders=None,
                 seed=None,
                 neuron_type='lif',
                 array_size=1,
                 eval_points=None,
                 decoder_noise=0.1,
                 noise_type='uniform',
                 noise=None,
                 mode='spiking'):
        """Construct an ensemble composed of the specific neuron model,
        with the specified neural parameters.

        :param int neurons: number of neurons in this population
        :param int dimensions:
            number of dimensions in the vector space
            that these neurons represent
        :param float tau_ref: length of refractory period
        :param float tau_rc:
            RC constant; approximately how long until 2/3
            of the threshold voltage is accumulated
        :param tuple max_rate:
            lower and upper bounds on randomly generated
            firing rates for each neuron
        :param tuple intercept:
            lower and upper bounds on randomly generated
            x offsets for each neuron
        :param float radius:
            the range of input values (-radius:radius)
            per dimension this population is sensitive to
        :param list encoders: set of possible preferred directions
        :param int seed: seed value for random number generator
        :param string neuron_type:
            type of neuron model to use, options = {'lif'}
        :param int array_size: number of sub-populations for network arrays
        :param list eval_points:
            specific set of points to optimize decoders over by default
        :param float decoder_noise: amount of noise to assume when computing 
            decoder    
        :param string noise_type:
            the type of noise added to the input current.
            Possible options = {'uniform', 'gaussian'}.
            Default is 'uniform' to match the Nengo implementation.
        :param float noise:
            noise parameter for noise added to input current,
            sampled at every timestep.
            If noise_type = uniform, this is the lower and upper
            bound on the distribution.
            If noise_type = gaussian, this is the variance.

        """
        if seed is None:
            seed = np.random.randint(1000)
        self.seed = seed
        self.neurons_num = neurons
        self.dimensions = dimensions
        self.array_size = array_size
        self.radius = radius
        self.noise = noise
        self.noise_type = noise_type
        self.decoder_noise = decoder_noise
        self.mode = mode

        # make sure that eval_points is the right shape
        if eval_points is not None:
            eval_points = np.array(eval_points)
            if len(eval_points.shape) == 1:
                eval_points.shape = [1, eval_points.shape[0]]
        self.eval_points = eval_points

        # make sure intercept is the right shape
        if isinstance(intercept, (int, float)): intercept = [intercept, 1]
        elif len(intercept) == 1: intercept.append(1)

        self.cache_key = cache.generate_ensemble_key(
            neurons=neurons,
            dimensions=dimensions,
            tau_rc=tau_rc,
            tau_ref=tau_ref,
            max_rate=max_rate,
            intercept=intercept,
            radius=radius,
            encoders=encoders,
            decoder_noise=decoder_noise,
            eval_points=eval_points,
            noise=noise,
            seed=seed,
            dt=dt,
            array_size=array_size)

        # make dictionary for origins
        self.origin = {}
        # set up a dictionary for decoded_input
        self.decoded_input = {}

        # if we're creating a spiking ensemble
        if self.mode == 'spiking':

            # TODO: handle different neuron types,
            self.neurons = neuron.types[neuron_type](size=(array_size,
                                                           self.neurons_num),
                                                     tau_rc=tau_rc,
                                                     tau_ref=tau_ref)

            # compute alpha and bias
            self.srng = RandomStreams(seed=seed)
            self.max_rate = max_rate
            max_rates = np.random.uniform(size=(self.array_size,
                                                self.neurons_num),
                                          low=max_rate[0],
                                          high=max_rate[1])
            threshold = np.random.uniform(size=(self.array_size,
                                                self.neurons_num),
                                          low=intercept[0],
                                          high=intercept[1])
            self.alpha, self.bias = self.neurons.make_alpha_bias(
                max_rates, threshold)

            # compute encoders
            self.encoders = self.make_encoders(encoders=encoders)
            # combine encoders and gain for simplification
            self.encoders = (self.encoders.T * self.alpha.T).T
            self.shared_encoders = theano.shared(
                self.encoders, name='ensemble.shared_encoders')

            # set up a dictionary for encoded_input connections
            self.encoded_input = {}
            # list of learned terminations on ensemble
            self.learned_terminations = []

            # make default origin
            self.add_origin('X',
                            func=None,
                            dt=dt,
                            eval_points=self.eval_points)

        elif self.mode == 'direct':

            # make default origin
            self.add_origin('X',
                            func=None,
                            dimensions=self.dimensions * self.array_size)
            # reset neurons_num to 0
            self.neurons_num = 0

    def add_termination(self,
                        name,
                        pstc,
                        decoded_input=None,
                        encoded_input=None):
        """Accounts for a new termination that takes the given input
        (a theano object) and filters it with the given pstc.

        Adds its contributions to the set of decoded, encoded,
        or learn input with the same pstc. Decoded inputs
        are represented signals, encoded inputs are
        decoded_output * weight matrix, learn input is
        activities * weight_matrix.

        Can only have one of decoded OR encoded OR learn input != None.

        :param float pstc: post-synaptic time constant
        :param decoded_input:
            theano object representing the decoded output of
            the pre population multiplied by this termination's
            transform matrix
        :param encoded_input:
            theano object representing the encoded output of
            the pre population multiplied by a connection weight matrix
        :param learn_input:
            theano object representing the learned output of
            the pre population multiplied by a connection weight matrix
        
        """
        # make sure one and only one of
        # (decoded_input, encoded_input) is specified
        if decoded_input is not None: assert (encoded_input is None)
        elif encoded_input is not None: assert (decoded_input is None)
        else: assert False

        if decoded_input:
            if self.mode is not 'direct':
                # rescale decoded_input by this neuron's radius
                source = TT.true_div(decoded_input, self.radius)
            # ignore radius in direct mode
            else:
                source = decoded_input
            name = helpers.get_unique_name(name, self.decoded_input)
            self.decoded_input[name] = filter.Filter(name=name,
                                                     pstc=pstc,
                                                     source=source,
                                                     shape=(self.array_size,
                                                            self.dimensions))
        elif encoded_input:
            name = helpers.get_unique_name(name, self.encoded_input)
            self.encoded_input[name] = filter.Filter(name=name,
                                                     pstc=pstc,
                                                     source=encoded_input,
                                                     shape=(self.array_size,
                                                            self.neurons_num))

    def add_learned_termination(self,
                                name,
                                pre,
                                error,
                                pstc,
                                dt,
                                learned_termination_class=hPESTermination,
                                **kwargs):
        """Adds a learned termination to the ensemble.

        Input added to encoded_input, and a learned_termination object
        is created to keep track of the pre and post
        (self) spike times, and adjust the weight matrix according
        to the specified learning rule.

        :param Ensemble pre: the pre-synaptic population
        :param Ensemble error: the Origin that provides the error signal
        :param float pstc:
        :param learned_termination_class:
        """
        #TODO: is there ever a case we wouldn't want this?
        assert error.dimensions == self.dimensions * self.array_size

        # generate an initial weight matrix if none provided,
        # random numbers between -.001 and .001
        if 'weight_matrix' not in kwargs.keys():
            weight_matrix = np.random.uniform(
                size=(self.array_size * pre.array_size, self.neurons_num,
                      pre.neurons_num),
                low=-1e-3,
                high=1e-3)

            kwargs['weight_matrix'] = weight_matrix.astype('float32')
        else:
            # make sure it's an np.array
            #TODO: error checking to make sure it's the right size
            kwargs['weight_matrix'] = np.array(kwargs['weight_matrix'],
                                               dtype='float32')

        learned_term = learned_termination_class(pre=pre,
                                                 post=self,
                                                 error=error,
                                                 **kwargs)

        learn_projections = [
            TT.dot(pre.neurons.output[learned_term.pre_index(i)],
                   learned_term.weight_matrix[i % self.array_size].T) / dt
            for i in range(self.array_size * pre.array_size)
        ]

        # now want to sum all the output to each of the post ensembles
        # going to reshape and sum along the 0 axis
        learn_output = TT.sum(
            TT.reshape(learn_projections,
                       (pre.array_size, self.array_size, self.neurons_num)),
            axis=0)
        # reshape to make it (array_size x neurons_num)
        learn_output = TT.reshape(learn_output,
                                  (self.array_size, self.neurons_num))

        # the input_current from this connection during simulation
        self.add_termination(name=name, pstc=pstc, encoded_input=learn_output)
        self.learned_terminations.append(learned_term)
        return learned_term

    def add_origin(self, name, func, **kwargs):
        """Create a new origin to perform a given function
        on the represented signal.

        :param string name: name of origin
        :param function func:
            desired transformation to perform over represented signal
        :param list eval_points:
            specific set of points to optimize decoders over for this origin
        """

        # if we're in spiking mode create an ensemble_origin with decoders
        # and the whole shebang for interpreting the neural activity
        if self.mode == 'spiking':
            if 'eval_points' not in kwargs.keys():
                kwargs['eval_points'] = self.eval_points
            self.origin[name] = ensemble_origin.EnsembleOrigin(ensemble=self,
                                                               func=func,
                                                               **kwargs)

        # if we're in direct mode then this population is just directly
        # performing the specified function, use a basic origin
        elif self.mode == 'direct':
            if func is not None:
                if 'initial_value' not in kwargs.keys():
                    # [func(np.zeros(self.dimensions)) for i in range(self.array_size)]
                    init = func(np.zeros(self.dimensions))
                    init = np.array([init for i in range(self.array_size)])
                    kwargs['initial_value'] = init.flatten()

            if kwargs.has_key('dt'): del kwargs['dt']
            self.origin[name] = origin.Origin(func=func, **kwargs)

    def make_encoders(self, encoders=None):
        """Generates a set of encoders.

        :param int neurons: number of neurons 
        :param int dimensions: number of dimensions
        :param theano.tensor.shared_randomstreams snrg:
            theano random number generator function
        :param list encoders:
            set of possible preferred directions of neurons

        """
        if encoders is None:
            # if no encoders specified, generate randomly
            encoders = np.random.normal(size=(self.array_size,
                                              self.neurons_num,
                                              self.dimensions))
        else:
            # if encoders were specified, cast list as array
            encoders = np.array(encoders).T
            # repeat array until 'encoders' is the same length
            # as number of neurons in population
            encoders = np.tile(encoders,
                               (self.neurons_num / len(encoders) +
                                1)).T[:self.neurons_num, :self.dimensions]
            encoders = np.tile(encoders, (self.array_size, 1, 1))

        # normalize encoders across represented dimensions
        norm = np.sum(encoders * encoders, axis=2)[:, :, None]
        encoders = encoders / np.sqrt(norm)

        return encoders.astype('float32')

    def theano_tick(self):

        if self.mode == 'direct':
            # set up matrix to store accumulated decoded input
            X = np.zeros((self.array_size, self.dimensions), dtype='float32')
            # updates is an ordered dictionary of theano variables to update

            for di in self.decoded_input.values():
                # add its values to the total decoded input
                X += di.value.get_value()

            # if we're calculating a function on the decoded input
            for o in self.origin.values():
                if o.func is not None:
                    val = np.float32([o.func(X[i]) for i in range(len(X))])
                    o.decoded_output.set_value(val.flatten())

    def update(self, dt):
        """Compute the set of theano updates needed for this ensemble.

        Returns a dictionary with new neuron state,
        termination, and origin values.

        :param float dt: the timestep of the update
        """

        ### find the total input current to this population of neurons

        # set up matrix to store accumulated decoded input
        X = None
        # updates is an ordered dictionary of theano variables to update
        updates = OrderedDict()

        for ii, di in enumerate(self.decoded_input.values()):
            # add its values to the total decoded input
            if ii == 0: X = di.value
            else: X += di.value
            updates.update(di.update(dt))

        # if we're in spiking mode, then look at the input current and
        # calculate new neuron activities for output
        if self.mode == 'spiking':

            # apply respective biases to neurons in the population
            J = TT.as_tensor_variable(np.array(self.bias))

            for ei in self.encoded_input.values():
                # add its values directly to the input current
                J += (ei.value.T * self.alpha.T).T
                updates.update(ei.update(dt))

            # only do this if there is decoded_input
            if X is not None:
                # add to input current for each neuron as
                # represented input signal x preferred direction

                for i in range(self.array_size):  #len(self.bias)):
                    J = TT.inc_subtensor(
                        J[i], TT.dot(X[i], self.shared_encoders[i].T))

            # if noise has been specified for this neuron,
            if self.noise:
                # generate random noise values, one for each input_current element,
                # with standard deviation = sqrt(self.noise=std**2)
                # When simulating white noise, the noise process must be scaled by
                # sqrt(dt) instead of dt. Hence, we divide the std by sqrt(dt).
                if self.noise_type.lower() == 'gaussian':
                    J += self.srng.normal(size=self.bias.shape,
                                          std=np.sqrt(self.noise / dt))
                elif self.noise_type.lower() == 'uniform':
                    J += self.srng.uniform(size=self.bias.shape,
                                           low=-self.noise / np.sqrt(dt),
                                           high=self.noise / np.sqrt(dt))

            # pass that total into the neuron model to produce
            # the main theano computation
            updates.update(self.neurons.update(J, dt))

            for l in self.learned_terminations:
                # also update the weight matrices on learned terminations
                updates.update(l.update(dt))

            # and compute the decoded origin decoded_input from the neuron output
            for o in self.origin.values():
                updates.update(o.update(dt, updates[self.neurons.output]))

        if self.mode == 'direct':

            # if we're in direct mode then just directly pass the decoded_input
            # to the origins for decoded_output
            for o in self.origin.values():
                if o.func is None:
                    if len(self.decoded_input) > 0:
                        updates.update(
                            OrderedDict({
                                o.decoded_output:
                                TT.flatten(X).astype('float32')
                            }))
        return updates
Exemplo n.º 44
0
def random_uniform(shape, low=0.0, high=1.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    rng = RandomStreams(seed=seed)
    return rng.uniform(shape, low=low, high=high, dtype=dtype)
Exemplo n.º 45
0
class HardGatedRecurrent(BaseRecurrent, Initializable):
    @lazy(allocation=['dim'])
    def __init__(self, dim, activation=None, mlp=None, **kwargs):
        super(HardGatedRecurrent, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()
        self.activation = activation

        # The activation of the mlp should be a Logistic function
        self.mlp = mlp

        # The random stream
        self.randomstream = MRG_RandomStreams()

        self.children = [activation, mlp]

    @property
    def state_to_state(self):
        return self.params[0]

    @property
    def matrix_gate(self):
        return self.params[1]

    def get_dim(self, name):
        if name == 'mask':
            return 0
        if name in ['inputs', 'states']:
            return self.dim
        return super(HardGatedRecurrent, self).get_dim(name)

    def _allocate(self):
        self.params.append(
            shared_floatx_nans((self.dim, self.dim), name='state_to_state'))
        self.params.append(
            shared_floatx_zeros((self.dim, ), name="initial_state"))
        add_role(self.params[0], WEIGHT)
        add_role(self.params[1], INITIAL_STATE)

    def _initialize(self):
        self.weights_init.initialize(self.state_to_state, self.rng)

    @recurrent(sequences=['mask', 'inputs'],
               states=['states'],
               outputs=['states'],
               contexts=[])
    def apply(self, inputs, states, mask=None):
        """Apply the gated recurrent transition.
        Parameters
        ----------
        states : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current states in the shape
            (batch_size, dim). Required for `one_step` usage.
        inputs : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of inputs in the shape (batch_size,
            dim)
        mask : :class:`~tensor.TensorVariable`
            A 1D binary array in the shape (batch,) which is 1 if there is
            data available, 0 if not. Assumed to be 1-s only if not given.
        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            Next states of the network.
        """
        # Concatenate the inputs of the MLP
        mlp_input = tensor.concatenate((inputs, states), axis=1)

        # Compute the output of the MLP
        gate_value = self.mlp.apply(mlp_input)
        random = self.randomstream.uniform((1, ))

        # TODO: Find a way to remove the following "hack".
        # Simply removing the two next lines won't work
        gate_value = gate_value[:, 0]
        gate_value = gate_value[:, None]

        # Compute the next_states value, before gating
        next_states = self.activation.apply(
            states.dot(self.state_to_state) + inputs)

        # Apply the gating
        next_states = tensor.switch(tensor.le(random[0], gate_value),
                                    next_states, states)

        if mask:
            next_states = (mask[:, None] * next_states +
                           (1 - mask[:, None]) * states)
        return next_states

    @application(outputs=apply.states)
    def initial_states(self, batch_size, *args, **kwargs):
        return [tensor.repeat(self.params[2][None, :], batch_size, 0)]
Exemplo n.º 46
0
def test_uniform():
    # TODO: test param low, high
    # TODO: test size=None
    # TODO: test ndim!=size.ndim
    # TODO: test bad seed
    # TODO: test size=Var, with shape that change from call to call
    if (config.mode in ["DEBUG_MODE", "DebugMode", "FAST_COMPILE"]
            or config.mode == "Mode" and config.linker in ["py"]):
        sample_size = (10, 100)
        steps = 50
    else:
        sample_size = (500, 50)
        steps = int(1e3)

    x = tensor.matrix()
    for size, const_size, var_input, input in [
        (sample_size, sample_size, [], []),
        (x.shape, sample_size, [x],
         [np.zeros(sample_size, dtype=config.floatX)]),
        (
            (x.shape[0], sample_size[1]),
            sample_size,
            [x],
            [np.zeros(sample_size, dtype=config.floatX)],
        ),
            # test empty size (scalar)
        ((), (), [], []),
    ]:

        # TEST CPU IMPLEMENTATION
        # The python and C implementation are tested with DebugMode
        x = tensor.matrix()
        R = MRG_RandomStreams(234)
        # Note: we specify `nstreams` to avoid a warning.
        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
        # for such situations: it would be better to instead filter the
        # warning using the warning module.
        u = R.uniform(size=size,
                      nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, u)
        assert any([
            isinstance(node.op, theano.sandbox.rng_mrg.mrg_uniform)
            for node in f.maker.fgraph.toposort()
        ])
        f(*input)

        # Increase the number of steps if sizes implies only a few samples
        if np.prod(const_size) < 10:
            steps_ = steps * 100
        else:
            steps_ = steps
        check_basics(f, steps_, const_size, prefix="mrg cpu", inputs=input)

        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        uu = RR.uniform(size=size)
        ff = theano.function(var_input, uu)
        # It's not our problem if numpy generates 0 or 1
        check_basics(ff,
                     steps_,
                     const_size,
                     prefix="numpy",
                     allow_01=True,
                     inputs=input)
Exemplo n.º 47
0
def _pokemon_wgan_gp():
    import os
    os.environ["FUEL_DATA_PATH"] = os.getcwd() + "/data/"
    batch_size = 20
    data_train = PokemonGenYellowNormal(which_sets=['train'],
                                        sources=['features'])

    train_stream = Flatten(DataStream.default_stream(
        data_train, iteration_scheme=SequentialScheme(
            data_train.num_examples, batch_size)))

    features_size = 56 * 56 * 1

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    # print train_stream.get_epoch_iterator(as_dict=True).next()
    # raise

    inputs = T.matrix('features')
    inputs = ((inputs / 255.) * 2. - 1.)

    rng = MRG_RandomStreams(123)

    prior = Z_prior(dim=512)
    gen = Generator(input_dim=512, dims=[512, 512, 512, 512,
                                         features_size],
                    alpha=0.1, **inits)

    dis = Discriminator(dims=[features_size, 512, 512 , 512, 512],
                        alpha=0.1, **inits)

    gan = GAN(dis=dis, gen=gen, prior=prior)
    gan.initialize()

    # gradient penalty
    fake_samples, _ = gan.sampling(inputs.shape[0])
    e = rng.uniform(size=(inputs.shape[0], 1))

    mixed_input = (e * fake_samples) + (1 - e) * inputs

    output_d_mixed = gan._dis.apply(mixed_input)

    grad_mixed = T.grad(T.sum(output_d_mixed), mixed_input)

    norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=1))
    grad_penalty = T.mean(T.square(norm_grad_mixed -1))

    y_hat1, y_hat0, z = gan.apply(inputs)

    d_loss_real = y_hat1.mean()
    d_loss_fake = y_hat0.mean()
    d_loss = - d_loss_real + d_loss_fake + 10 * grad_penalty
    g_loss = - d_loss_fake


    dis_obj = d_loss
    gen_obj = g_loss

    model = Model([y_hat0, y_hat1])

    em_loss = -d_loss_real + d_loss_fake

    em_loss.name = "Earth Move loss"
    dis_obj.name = 'Discriminator loss'
    gen_obj.name = 'Generator loss'

    cg = ComputationGraph([gen_obj, dis_obj])

    gen_filter = VariableFilter(roles=[PARAMETER],
                                bricks=gen.linear_transformations)

    dis_filter = VariableFilter(roles=[PARAMETER],
                                bricks=dis.linear_transformations)

    gen_params = gen_filter(cg.variables)
    dis_params = dis_filter(cg.variables)

# Prepare the dropout
    _inputs = []
    for brick_ in [gen]:
        _inputs.extend(VariableFilter(roles=[INPUT],
                    bricks=brick_.linear_transformations)(cg.variables))

    cg_dropout = apply_dropout(cg, _inputs, 0.02)

    gen_obj = cg_dropout.outputs[0]
    dis_obj = cg_dropout.outputs[1]

    gan.dis_params = dis_params
    gan.gen_params = gen_params

    # gradient penalty

    algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj,
                              model=gan, dis_iter=5, gradient_clip=None,
                              step_rule=RMSProp(learning_rate=1e-4),
                              gen_consider_constant=z)

    neg_sample = gan.sampling(size=25)

    from blocks.monitoring.aggregation import mean

    monitor = TrainingDataMonitoring(variables=[mean(gen_obj), mean(dis_obj),
                                                mean(em_loss)],
                                     prefix="train", after_batch=True)

    subdir = './exp/' + 'pokemon-wgan-gp' + "-" + time.strftime("%Y%m%d-%H%M%S")

    check_point = Checkpoint("{}/{}".format(subdir, 'CIFAR10'),
                                every_n_epochs=100,
                                save_separately=['log', 'model'])

    neg_sampling = GenerateNegtiveSample(neg_sample,
                                         img_size=(25, 56, 56),
                                         every_n_epochs=10)

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(algorithm=algo, model=model,
                         data_stream=train_stream,
                         extensions=[Printing(), ProgressBar(), monitor,
                                     check_point, neg_sampling])

    main_loop.run()
Exemplo n.º 48
0
 class Graph:
     def __init__(self, seed=123):
         self.rng = MRG_RandomStreams(seed)
         self.y = self.rng.uniform(size=(1, ))
Exemplo n.º 49
0
class NNClassifier_theano:
    """
    NNClassifier_theano is a custom class for mass spectra classification. It is
    essentially a wrapper for nolearn and processes the hyperparameters
    given by the neuralNetInterface.

    :param architecture: lasagne.layers.Layer object
    :param hyperparameter: dictionary of hyperparameter-value-pairs
    """
    def __init__(self, architecture, hyperparameter={}):
        self.archi = architecture
        self.hyperp = hyperparameter
        self._srng = RandomStreams(get_rng().randint(
            1, 2147462579))  # for adaptive noise
        self._srng2 = rStream(2147462579)

        # Create nolearn ModifiedNeuralNet object
        self.classifier  = ModifiedNeuralNet(
            layers=self.archi,
            max_epochs=self.hyperp.setdefault('epochs',100),
            update=self.hyperp.setdefault('optimizer',lasagne.updates.adam),
            update_learning_rate=self.hyperp.setdefault('learningRate',0.001),
            objective = modifiedObjective,
            objective_logitSens = self.hyperp.setdefault('logitSens',0.),
            objective_probSens = self.hyperp.setdefault('probSens',0.),
            objective_lossSens = self.hyperp.setdefault('lossSens',0.),
            objective_std = self.hyperp.setdefault('trainingDataStd',None),
            objective_loss_function=categorical_crossentropy,
            verbose=0,
            batch_iterator_train = DataAugmentationBatchIterator(
                self.hyperp.setdefault('batchSize',64),
                disturbLabelRate=self.hyperp.setdefault('disturbLabelRate',0),
                sdWidth=self.hyperp.setdefault('sdWidth',0),
                sdNumber=self.hyperp.setdefault('sdNumber',0),
                shuffle=True),
            batch_iterator_test = nolearn.lasagne.BatchIterator(
                self.hyperp.setdefault('batchSize',64),shuffle=False),\
            train_split = TrainSplit(eval_size=self.hyperp.setdefault(
                'validationSetRatio',.1)),
            objective_l1 = self.hyperp.setdefault('l1',0.),
            objective_l2 = self.hyperp.setdefault('l2',0.01),
            on_training_started=[nolearn.lasagne.PrintLayerInfo()],
            on_epoch_finished=[getIndividualLosses,
                               printError,
                               addEndTimeToHistory,
                               printAdaptiveNoise,
                               saveBestValidNet])
        self.classifier.initialize()

    def trainModel(self, data, classes):
        validationSetRatio = self.hyperp.setdefault('validationSetRatio', .1)
        if validationSetRatio != 0:
            initialShuffleIndices = np.arange(len(data), dtype='int32')
            np.random.shuffle(initialShuffleIndices)
            data = data[initialShuffleIndices]
            classes = classes[initialShuffleIndices]

            numOfValPoints = np.floor(validationSetRatio * len(data))
            validationData = data[:np.int32(numOfValPoints)]
            validationClasses = classes[:np.int32(numOfValPoints)]

            data = data[np.int32(numOfValPoints):]
            classes = classes[np.int32(numOfValPoints):]

        y = T.ivector()

        verbose = self.hyperp.setdefault('verbose', 1.)

        maxEpochs = self.hyperp.setdefault('epochs', 100)
        optimizer = self.hyperp.setdefault('optimizer', lasagne.updates.adam)
        learningRate = self.hyperp.setdefault('learningRate', 0.001)

        if self.hyperp['useSensRegControl']:
            regMultiplier = theano.shared(0.)
        else:
            regMultiplier = theano.shared(1.)

        # RENAME THIS GLOBALLY
        std = self.hyperp.setdefault('trainingDataStd', None)

        biTrain = nolearn.lasagne.BatchIterator(self.hyperp.setdefault(
            'batchSize', 64),
                                                shuffle=True)
        biVal = nolearn.lasagne.BatchIterator(self.hyperp.setdefault(
            'batchSize', 64),
                                              shuffle=False)
        #biTest = nolearn.lasagne.BatchIterator(self.hyperp.setdefault('batchSize',64),shuffle=False)

        layers = self.classifier.layers_
        outputLayer = layers[-1]
        inputLayer = layers[0]
        networkInput = inputLayer.input_var

        ###### FOR DEBUGGING, DELETE LATER ########
        print(self.hyperp)

        # computation of losses for training (not deterministic)
        classificationLoss, totalLoss, l1Loss, l2Loss, logitSensLoss, logitDiffSensLoss, logitSqSensLoss, probSensLoss, lossSensLoss = self.computeLosses(
            y, std, regMultiplier, deterministic=False)

        # computation of losses for output and cleaning (deterministic)
        classificationLossVal, totalLossVal, l1LossVal, l2LossVal, logitSensLossVal, logitDiffSensLossVal, logitSqSensLossVal, probSensLossVal, lossSensLossVal = self.computeLosses(
            y, std, regMultiplier, deterministic=True)

        params = lasagne.layers.get_all_params(outputLayer, trainable=True)
        updates = optimizer(totalLoss, params, learning_rate=learningRate)

        #        testPrediction  = lasagne.layers.get_output(outputLayer,
        #                                                    deterministic = True)
        #        testAccuracy = T.mean(T.eq(T.argmax(testPrediction, axis=1), y),
        #                          dtype=theano.config.floatX)

        # adaptive noise regularization #
        advNoise = self.hyperp.setdefault('adversarialNoise', 0.)
        if advNoise > 0:
            bXAdap = self.__adversarialNoise(std, advNoise)
            print('Compiling DeepFool graph...')
            deepFoolFunction = theano.function([networkInput], bXAdap)

        print('Compiling neural network graph...')
        trainLoopFunction = theano.function([networkInput, y],
                                            totalLoss,
                                            updates=updates)

        print('Compiling output graph...')
        lossOutputs = theano.function([networkInput, y], [
            classificationLossVal, l1LossVal, l2LossVal, logitSensLossVal,
            logitSqSensLossVal, probSensLossVal, lossSensLossVal
        ])

        if verbose == 0:
            simpleOutput = theano.function([networkInput, y], totalLoss)

        print('Beginning training loop...')
        self.classifier.train_history_ = list()
        for epoch in range(maxEpochs):
            ########################
            #    TRAINING DATA     #
            ########################
            trainBatches = biTrain(data, classes)
            numOfTrainingPoints = len(data)
            out = ""
            for bX, by in trainBatches:
                if advNoise > 0:  # adaptive noise on the network inputs
                    sizeBatch = bX.shape[0]
                    # DeepFool
                    bXFool = np.copy(bX)
                    # create dataset with normal and adversarial examples
                    # Parameter: p
                    p = 1
                    randFool = np.random.binomial(1, p,
                                                  sizeBatch).astype('bool')
                    bXFool = bXFool[randFool, :, :, :]
                    # call deepFool on subset of samples
                    bXadap = deepFoolFunction(bXFool)
                    # stack noise samples and unchanged samples
                    bXNormal = bX[~randFool, :, :, :]
                    bXCombined = np.vstack((bXadap, bXNormal))
                    # stack labels
                    byFool = by[randFool]
                    byNormal = by[~randFool]
                    byCombined = np.hstack((byFool, byNormal))
                    # call training function
                    totalError = trainLoopFunction(bXCombined, byCombined)
                else:
                    totalError = trainLoopFunction(bX, by)

            cL = l1L = l2L = loL = loSqL = prL = lsL = 0

            if verbose == 1:
                for bX, by in trainBatches:
                    stdout.flush()
                    batchSize = bX.shape[0]
                    cLt, l1Lt, l2Lt, loLt, loSqLt, prLt, lsLt = lossOutputs(
                        bX, by)
                    cL += cLt * batchSize
                    l1L += l1Lt * batchSize
                    l2L += l2Lt * batchSize
                    loL += loLt * batchSize
                    loSqL += loSqLt * batchSize
                    prL += prLt * batchSize
                    lsL += lsLt * batchSize

                cL /= numOfTrainingPoints
                l1L /= numOfTrainingPoints
                l2L /= numOfTrainingPoints
                loL /= numOfTrainingPoints
                loSqL /= numOfTrainingPoints
                prL /= numOfTrainingPoints
                lsL /= numOfTrainingPoints

                out = "Training set  : cL: %5.5f, l1: %5.5f, l2: %5.5f, logitSens: %5.5f, logitSqSens: %5.5f, probSens: %5.5f, lossSens: %5.5f" % (
                    cL, l1L, l2L, loL, loSqL, prL, lsL)

            if verbose == 0:
                out = "Total loss (last batch): %5.5f" % (totalError)
                stdout.flush()

            logDict = dict()
            logDict['cL'] = cL
            logDict['l1'] = l1L
            logDict['l2'] = l2L
            logDict['loL'] = loL
            logDict['loSqL'] = loSqL
            logDict['prL'] = prL
            logDict['lsL'] = lsL

            print("Epoch ", epoch + 1, ":")
            print(out)

            if self.hyperp['useSensRegControl']:
                if epoch > .25 * maxEpochs and epoch < .75 * maxEpochs:
                    t = float(epoch - maxEpochs / 4.) / (maxEpochs / 2.)
                    regMultiplier.set_value(t)
                if epoch >= .75 * maxEpochs:
                    regMultiplier.set_value(1)

            ########################
            #    VALIDATION DATA   #
            ########################
            if validationSetRatio != 0:
                valBatches = biVal(validationData, validationClasses)
                numOfTrainingPoints = len(validationData)
                out = ""

                cL = l1L = l2L = loL = loSqL = prL = lsL = 0

                for bX, by in valBatches:
                    stdout.flush()
                    batchSize = bX.shape[0]
                    cLt, l1Lt, l2Lt, loLt, loSqLt, prLt, lsLt = lossOutputs(
                        bX, by)
                    cL += cLt * batchSize
                    l1L += l1Lt * batchSize
                    l2L += l2Lt * batchSize
                    loL += loLt * batchSize
                    loSqL += loSqLt * batchSize
                    prL += prLt * batchSize
                    lsL += lsLt * batchSize

                cL /= numOfTrainingPoints
                l1L /= numOfTrainingPoints
                l2L /= numOfTrainingPoints
                loL /= numOfTrainingPoints
                loSqL /= numOfTrainingPoints
                prL /= numOfTrainingPoints
                lsL /= numOfTrainingPoints

                logDict['val_cL'] = cL
                logDict['val_l1'] = l1L
                logDict['val_l2'] = l2L
                logDict['val_loL'] = loL
                logDict['val_loSqL'] = loSqL
                logDict['val_prL'] = prL
                logDict['val_lsL'] = lsL

                out = "Validation set: cL: %5.5f, l1: %5.5f, l2: %5.5f, logitSens: %5.5f, logitSqSens: %5.5f, probSens: %5.5f, lossSens: %5.5f" % (
                    cL, l1L, l2L, loL, loSqL, prL, lsL)
                print(out)

            self.classifier.train_history_.append(logDict)

            # Check if the current training epoch is the best model
            #getBestValidNet(self.classifier)

        return NNModel_theano(self.classifier)

    def __adversarialNoise(self, std, advNoise):
        """
        TODO: currently only works for 2 classes !!!
        """
        layers = self.classifier.layers_
        outputLayer = layers[-2]
        inputLayer = layers[0]
        networkInput = inputLayer.input_var
        networkOutput = get_output(outputLayer, deterministic=True)
        # difference of logits (only works for 2-classes !!!!)
        fX = networkOutput[:, 0] - networkOutput[:, 1]
        g_logit = T.grad(T.sum(fX), networkInput)
        # scale gradient by std
        if std is not None:
            g_logit = std * g_logit
        # l2-norm squared
        grad_l2 = T.sum(T.sqr(g_logit), axis=(1, 2, 3)) + 1e-12
        # scale again by gradient due to divison by squred l2-norm of gradient
        if std is not None:
            sqrG_logit = std * g_logit
        r = -(sqrG_logit.dimshuffle(1, 2, 3, 0) *
              (fX / grad_l2)).dimshuffle(3, 0, 1, 2)
        # random scaling of noise
        randTensor = self._srng.uniform(size=(networkInput.shape[0], ),
                                        low=-advNoise / 2.,
                                        high=advNoise)
        addedTerm = (r.dimshuffle(2, 3, 1, 0) * randTensor).dimshuffle(
            3, 2, 0, 1)
        # projection onto non-negative values
        bXFool = T.maximum(networkInput + addedTerm, 0)
        return bXFool

    def computeLosses(self, y, std, regMultiplier, deterministic):

        logitSens = self.hyperp.setdefault('logitSens', 0.)
        logitDiffSens = self.hyperp.setdefault('logitDiffSens', 0.)
        logitSqSens = self.hyperp.setdefault('logitSqSens', 0.)
        probSens = self.hyperp.setdefault('probSens', 0.)
        lossSens = self.hyperp.setdefault('lossSens', 0.)
        l1 = self.hyperp.setdefault('l1', 0.)
        l2 = self.hyperp.setdefault('l2', 0.)

        layers = self.classifier.layers_

        lossFunction = lasagne.objectives.categorical_crossentropy

        aggregate = T.mean  # otherwise lasagne.objectives.aggregate

        outputLayer = layers[-1]
        logitLayer = layers[-2]
        inputLayer = layers[0]
        networkInput = inputLayer.input_var
        networkOutput = get_output(outputLayer, deterministic=deterministic)
        logitOutput = get_output(logitLayer, deterministic=deterministic)

        ######################################################################
        # Very weird thing:
        # lossSensitivity gradients can only be computed if the one-hot encoded
        # version of the loss function is used. BUT that version lacks a
        # stability optimization in Theano that leads to NaNs during training.
        # This is why both versions need to be employed here.

        L = lossFunction(networkOutput, y)

        y_oneHot = lasagne.utils.one_hot(y, outputLayer.output_shape[1])
        L_oneHot = lossFunction(networkOutput, y_oneHot)

        #######################################################################

        classificationLoss = aggregate(L)

        l1Loss = regularization.regularize_layer_params(
            layers.values(), regularization.l1)

        l2Loss = regularization.regularize_layer_params(
            layers.values(), regularization.l2)

        # logit sensitivity
        logit = T.sum(logitOutput * y_oneHot, axis=1)
        G_logit = T.grad(T.sum(logit), networkInput)

        if std is not None:
            G_logit = std * G_logit

        # Sparse logit saliency regularization
        absG_logit = T.abs_(G_logit)
        sumAbsG_logit = T.sum(absG_logit, axis=(1, 2, 3))
        logitSensLoss = aggregate(sumAbsG_logit)

        # Squared logit saliency regularization
        sqG_logit = G_logit**2
        sumSqG_logit = T.sum(sqG_logit, axis=(1, 2, 3))
        logitSqSensLoss = aggregate(sumSqG_logit)

        # probability sensitivity
        prob = T.sum(networkOutput * y_oneHot, axis=1)
        G_prob = T.grad(T.sum(prob), networkInput)

        if std is not None:
            G_prob = std * G_prob

        # Sparse probability saliency regularization
        absG_prob = T.abs_(G_prob)
        sumAbsG_prob = T.sum(absG_prob, axis=(1, 2, 3))
        probSensLoss = aggregate(sumAbsG_prob)

        # Loss sensitivity
        G_loss = theano.grad(T.sum(L_oneHot), networkInput)
        if std is not None:
            G_loss = std * G_loss
        absG_loss = T.abs_(G_loss)
        sumAbsG_loss = T.sum(absG_loss, axis=(1, 2, 3))
        lossSensLoss = aggregate(sumAbsG_loss)

        ####### !!!!!!!!!!!!!!!!!!! EXPERIMENTAL !!!!!!!!!!!!!!!!!! ##########
        #### !!!! only makes sense for 2-class problems in this case !!!! ####

        # Clumsy way to regularize logit differences
        # It works by replacing the matrix of one-hot encoded labels by one
        # whose first column is ones and the rest is minus ones. After summing
        # over each row, we are left with the difference of the logit of the
        # first class and the (sum of the) other class(es).

        plusMinusOneMatrix = 2 * lasagne.utils.one_hot(
            1, outputLayer.output_shape[1]) - T.ones_like(y_oneHot)
        logitDiff = T.sum(logitOutput * plusMinusOneMatrix, axis=1)
        G_logitDiff = T.grad(T.sum(logitDiff), networkInput)

        if std is not None:
            G_logitDiff = std * G_logitDiff

        absG_logitDiff = T.abs_(G_logitDiff)
        sumAbsG_logitDiff = T.sum(absG_logitDiff, axis=(1, 2, 3))
        logitDiffSensLoss = aggregate(sumAbsG_logitDiff)

        # Sum up
        totalLoss = classificationLoss
        if l1: totalLoss += regMultiplier * l1 * l1Loss
        if l2: totalLoss += regMultiplier * l2 * l2Loss
        if logitSens: totalLoss += regMultiplier * logitSens * logitSensLoss
        if logitDiffSens:
            totalLoss += regMultiplier * logitDiffSens * logitDiffSensLoss
        if logitSqSens:
            totalLoss += regMultiplier * logitSqSens * logitSqSensLoss
        if probSens: totalLoss += regMultiplier * probSens * probSensLoss
        if lossSens: totalLoss += regMultiplier * lossSens * lossSensLoss

        return classificationLoss, totalLoss, l1Loss, l2Loss, logitSensLoss, logitDiffSensLoss, logitSqSensLoss, probSensLoss, lossSensLoss
Exemplo n.º 50
0
def test_undefined_grad():
    srng = MRG_RandomStreams(seed=1234)

    # checking uniform distribution
    low = tensor.scalar()
    out = srng.uniform((), low=low)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, low)

    high = tensor.scalar()
    out = srng.uniform((), low=0, high=high)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, high)

    out = srng.uniform((), low=low, high=high)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, (low, high))

    # checking binomial distribution
    prob = tensor.scalar()
    out = srng.binomial((), p=prob)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, prob)

    # checking multinomial distribution
    prob1 = tensor.scalar()
    prob2 = tensor.scalar()
    p = [theano.tensor.as_tensor_variable([prob1, 0.5, 0.25])]
    out = srng.multinomial(size=None, pvals=p, n=4)[0]
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(theano.tensor.sum(out), prob1)

    p = [theano.tensor.as_tensor_variable([prob1, prob2])]
    out = srng.multinomial(size=None, pvals=p, n=4)[0]
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(theano.tensor.sum(out), (prob1, prob2))

    # checking choice
    p = [theano.tensor.as_tensor_variable([prob1, prob2, 0.1, 0.2])]
    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out[0], (prob1, prob2))

    p = [theano.tensor.as_tensor_variable([prob1, prob2])]
    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out[0], (prob1, prob2))

    p = [theano.tensor.as_tensor_variable([prob1, 0.2, 0.3])]
    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out[0], prob1)

    # checking normal distribution
    avg = tensor.scalar()
    out = srng.normal((), avg=avg)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, avg)

    std = tensor.scalar()
    out = srng.normal((), avg=0, std=std)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, std)

    out = srng.normal((), avg=avg, std=std)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, (avg, std))

    # checking truncated normal distribution
    avg = tensor.scalar()
    out = srng.truncated_normal((), avg=avg)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, avg)

    std = tensor.scalar()
    out = srng.truncated_normal((), avg=0, std=std)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, std)

    out = srng.truncated_normal((), avg=avg, std=std)
    with pytest.raises(theano.gradient.NullTypeGradError):
        theano.grad(out, (avg, std))
Exemplo n.º 51
0
 def gamma_dist(self, alpha, beta, num_MC):
     srng = RandomStreams(seed=234)
     N = srng.uniform(size=(alpha, num_MC), low=1e-10, high=1.0)
     return T.sum(T.log(N), 0) * (-1) / beta
Exemplo n.º 52
0
        colorImg=colorImg,
        scale=generation_scale,
        save_path=os.path.join(outfolder,
                               'x_l_' + str(ssl_para_seed) + '_AT-JD.png'))

n_batches_train_u_c = int(x_unlabelled.shape[0] / batch_size_u_c)
n_batches_train_l_c = int(x_labelled.shape[0] / batch_size_l_c)
n_batches_train_u_d = int(x_unlabelled.shape[0] / batch_size_u_d)
n_batches_train_l_d = int(x_labelled.shape[0] / batch_size_l_d)
n_batches_train_g = int(x_unlabelled.shape[0] / batch_size_g)
n_batches_eval = int(eval_x.shape[0] / batch_size_eval)
'''
models
'''
# symbols
sym_z_image = T.tile(theano_rng.uniform((z_generated, n_z)), (num_classes, 1))
sym_z_rand = theano_rng.uniform(size=(batch_size_g, n_z))
sym_x_u = T.tensor4()
sym_x_u_d = T.tensor4()
sym_x_u_g = T.tensor4()
sym_x_l = T.tensor4()
sym_y = T.ivector()
sym_y_g = T.ivector()
sym_x_eval = T.tensor4()
sym_lr = T.scalar()
sym_alpha_cla_g = T.scalar()
sym_alpha_unlabel_entropy = T.scalar()
sym_alpha_unlabel_average = T.scalar()

# te
sym_lr_cla = T.scalar('separate_lr')
Exemplo n.º 53
0
class OptionCritic_Network():
    def __init__(self,
                 model_network=None,
                 gamma=0.99,
                 learning_method="rmsprop",
                 actor_lr=0.00025,
                 batch_size=32,
                 input_size=None,
                 learning_params=None,
                 dnn_type=True,
                 clip_delta=0,
                 scale=255.,
                 freeze_interval=100,
                 grad_clip=0,
                 termination_reg=0,
                 num_options=8,
                 double_q=False,
                 temp=1,
                 entropy_reg=0,
                 BASELINE=False,
                 **kwargs):
        x = T.ftensor4()
        next_x = T.ftensor4()
        a = T.ivector()
        o = T.ivector()
        r = T.fvector()
        terminal = T.ivector()
        self.freeze_interval = freeze_interval

        self.theano_rng = MRG_RandomStreams(1000)

        self.x_shared = theano.shared(
            np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32'))
        self.next_x_shared = theano.shared(
            np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32'))
        self.a_shared = theano.shared(np.zeros((batch_size), dtype='int32'))
        self.o_shared = theano.shared(np.zeros((batch_size), dtype='int32'))
        self.terminal_shared = theano.shared(
            np.zeros((batch_size), dtype='int32'))
        self.r_shared = theano.shared(np.zeros((batch_size), dtype='float32'))

        state_network = model_network[:-1]
        termination_network = copy.deepcopy([model_network[-1]])
        termination_network[0]["activation"] = "sigmoid"
        print "NUM OPTIONS --->", num_options
        termination_network[0]["out_size"] = num_options
        option_network = copy.deepcopy([model_network[-1]])
        option_network[0]["activation"] = "softmax"
        Q_network = copy.deepcopy([model_network[-1]])
        Q_network[0]["out_size"] = num_options

        self.state_model = Model(state_network,
                                 input_size=input_size,
                                 dnn_type=dnn_type)
        self.state_model_prime = Model(state_network,
                                       input_size=input_size,
                                       dnn_type=dnn_type)
        output_size = [None, model_network[-2]["out_size"]]
        self.Q_model = Model(Q_network,
                             input_size=output_size,
                             dnn_type=dnn_type)
        self.Q_model_prime = Model(Q_network,
                                   input_size=output_size,
                                   dnn_type=dnn_type)
        self.termination_model = Model(termination_network,
                                       input_size=output_size,
                                       dnn_type=dnn_type)
        self.options_model = MLP3D(num_options, model_network, temp=temp)

        s = self.state_model.apply(x / scale)
        next_s = self.state_model.apply(next_x / scale)
        next_s_prime = self.state_model_prime.apply(next_x / scale)

        termination_probs = self.termination_model.apply(
            theano.gradient.disconnected_grad(s))
        option_term_prob = termination_probs[T.arange(o.shape[0]), o]
        next_termination_probs = self.termination_model.apply(
            theano.gradient.disconnected_grad(next_s))
        next_option_term_prob = next_termination_probs[T.arange(o.shape[0]), o]
        termination_sample = T.gt(option_term_prob,
                                  self.theano_rng.uniform(size=o.shape))

        Q = self.Q_model.apply(s)
        next_Q = self.Q_model.apply(next_s)
        next_Q_prime = theano.gradient.disconnected_grad(
            self.Q_model_prime.apply(next_s_prime))

        disc_option_term_prob = theano.gradient.disconnected_grad(
            next_option_term_prob)

        action_probs = self.options_model.apply(s, o)
        sampled_actions = T.argmax(self.theano_rng.multinomial(
            pvals=action_probs, n=1),
                                   axis=1).astype("int32")

        if double_q:
            print "TRAINING DOUBLE_Q"
            y = r + (1 - terminal) * gamma * (
                (1 - disc_option_term_prob) *
                next_Q_prime[T.arange(o.shape[0]), o] +
                disc_option_term_prob * next_Q_prime[T.arange(next_Q.shape[0]),
                                                     T.argmax(next_Q, axis=1)])
        else:
            y = r + (1 - terminal) * gamma * (
                (1 - disc_option_term_prob) *
                next_Q_prime[T.arange(o.shape[0]), o] +
                disc_option_term_prob * T.max(next_Q_prime, axis=1))

        y = theano.gradient.disconnected_grad(y)

        option_Q = Q[T.arange(o.shape[0]), o]
        td_errors = y - option_Q

        if clip_delta > 0:
            quadratic_part = T.minimum(abs(td_errors), clip_delta)
            linear_part = abs(td_errors) - quadratic_part
            td_cost = 0.5 * quadratic_part**2 + clip_delta * linear_part
        else:
            td_cost = 0.5 * td_errors**2

        # critic updates
        critic_cost = T.sum(td_cost)
        critic_params = self.Q_model.params + self.state_model.params
        learning_algo = self.Q_model.get_learning_method(
            learning_method, **learning_params)
        grads = T.grad(critic_cost, critic_params)
        critic_updates = learning_algo.apply(critic_params,
                                             grads,
                                             grad_clip=grad_clip)

        # actor updates
        actor_params = self.termination_model.params + self.options_model.params
        learning_algo = self.termination_model.get_learning_method("sgd",
                                                                   lr=actor_lr)
        disc_Q = theano.gradient.disconnected_grad(option_Q)
        disc_V = theano.gradient.disconnected_grad(T.max(Q, axis=1))
        term_grad = T.sum(option_term_prob *
                          (disc_Q - disc_V + termination_reg))
        entropy = -T.sum(action_probs * T.log(action_probs))
        if not BASELINE:
            policy_grad = - \
                T.sum(
                    T.log(action_probs[T.arange(a.shape[0]), a]) * y) - entropy_reg*entropy
        else:
            policy_grad = - \
                T.sum(T.log(action_probs[T.arange(a.shape[0]), a])
                      * (y-disc_Q)) - entropy_reg*entropy
        grads = T.grad(term_grad + policy_grad, actor_params)
        actor_updates = learning_algo.apply(actor_params,
                                            grads,
                                            grad_clip=grad_clip)

        if self.freeze_interval > 1:
            target_updates = OrderedDict()
            for t, b in zip(
                    self.Q_model_prime.params + self.state_model_prime.params,
                    self.Q_model.params + self.state_model.params):
                target_updates[t] = b
            self._update_target_params = theano.function(
                [], [], updates=target_updates)
            self.update_target_params()
            print "freeze interval:", self.freeze_interval
        else:
            print "freeze interval: None"

        critic_givens = {
            x: self.x_shared,
            o: self.o_shared,
            r: self.r_shared,
            terminal: self.terminal_shared,
            next_x: self.next_x_shared
        }

        actor_givens = {
            a: self.a_shared,
            r: self.r_shared,
            terminal: self.terminal_shared,
            o: self.o_shared,
            next_x: self.next_x_shared
        }

        print "compiling...",
        self.train_critic = theano.function([], [critic_cost],
                                            updates=critic_updates,
                                            givens=critic_givens)
        self.train_actor = theano.function([s], [],
                                           updates=actor_updates,
                                           givens=actor_givens)
        self.pred_score = theano.function([],
                                          T.max(Q, axis=1),
                                          givens={x: self.x_shared})
        self.sample_termination = theano.function(
            [s], [termination_sample, T.argmax(Q, axis=1)],
            givens={o: self.o_shared})
        self.sample_options = theano.function([s], T.argmax(Q, axis=1))
        self.sample_actions = theano.function([s],
                                              sampled_actions,
                                              givens={o: self.o_shared})
        self.get_action_dist = theano.function([s, o], action_probs)
        self.get_s = theano.function([], s, givens={x: self.x_shared})
        print "complete"

    def update_target_params(self):
        if self.freeze_interval > 1:
            self._update_target_params()
        return

    def predict_move(self, s):
        return self.sample_options(s)

    def predict_termination(self, s, a):
        self.a_shared.set_value(a)
        return tuple(self.sample_termination(s))

    def get_q_vals(self, x):
        self.x_shared.set_value(x)
        return self.pred_score()[:, np.newaxis]

    def get_state(self, x):
        self.x_shared.set_value(x)
        return self.get_s()

    def get_action(self, s, o):
        self.o_shared.set_value(o)
        return self.sample_actions(s)

    def train_conv_net(self,
                       train_set_x,
                       next_x,
                       options,
                       r,
                       terminal,
                       actions=None,
                       model=""):
        self.next_x_shared.set_value(next_x)
        self.o_shared.set_value(options)
        self.r_shared.set_value(r)
        self.terminal_shared.set_value(terminal)
        if model == "critic":
            self.x_shared.set_value(train_set_x)
            return self.train_critic()
        elif model == "actor":
            self.a_shared.set_value(actions)
            return self.train_actor(train_set_x)
        else:
            print "WRONG MODEL NAME"
            raise NotImplementedError

    def save_params(self):
        return [
            self.state_model.save_params(),
            self.Q_model.save_params(),
            self.termination_model.save_params(),
            self.options_model.save_params()
        ]

    def load_params(self, values):
        self.state_model.load_params(values[0])
        self.Q_model.load_params(values[1])
        self.termination_model.load_params(values[2])
        self.options_model.load_params(values[3])
Exemplo n.º 54
0
    def __init__(self, args):

        self.args = args

        rng = np.random.RandomState(self.args.seed)  # fixed random seeds
        theano_rng = MRG_RandomStreams(rng.randint(2**15))
        lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
        data_rng = np.random.RandomState(self.args.seed_data)
        ''' specify pre-trained generator E '''
        self.enc_layers = [
            LL.InputLayer(shape=(None, 3, 32, 32), input_var=None)
        ]
        enc_layer_conv1 = dnn.Conv2DDNNLayer(self.enc_layers[-1],
                                             64, (5, 5),
                                             pad=0,
                                             stride=1,
                                             W=Normal(0.01),
                                             nonlinearity=nn.relu)
        self.enc_layers.append(enc_layer_conv1)
        enc_layer_pool1 = LL.MaxPool2DLayer(self.enc_layers[-1],
                                            pool_size=(2, 2))
        self.enc_layers.append(enc_layer_pool1)
        enc_layer_conv2 = dnn.Conv2DDNNLayer(self.enc_layers[-1],
                                             128, (5, 5),
                                             pad=0,
                                             stride=1,
                                             W=Normal(0.01),
                                             nonlinearity=nn.relu)
        self.enc_layers.append(enc_layer_conv2)
        enc_layer_pool2 = LL.MaxPool2DLayer(self.enc_layers[-1],
                                            pool_size=(2, 2))
        self.enc_layers.append(enc_layer_pool2)
        self.enc_layer_fc3 = LL.DenseLayer(self.enc_layers[-1],
                                           num_units=256,
                                           nonlinearity=T.nnet.relu)
        self.enc_layers.append(self.enc_layer_fc3)
        self.enc_layer_fc4 = LL.DenseLayer(self.enc_layers[-1],
                                           num_units=10,
                                           nonlinearity=T.nnet.softmax)
        self.enc_layers.append(self.enc_layer_fc4)
        ''' load pretrained weights for encoder '''
        weights_toload = np.load('pretrained/encoder.npz')
        weights_list_toload = [
            weights_toload['arr_{}'.format(k)]
            for k in range(len(weights_toload.files))
        ]
        LL.set_all_param_values(self.enc_layers[-1], weights_list_toload)
        ''' input tensor variables '''
        #self.G_weights
        #self.D_weights
        self.dummy_input = T.scalar()
        self.G_layers = []
        self.z = theano_rng.uniform(size=(self.args.batch_size,
                                          self.args.z0dim))
        self.x = T.tensor4()
        self.meanx = T.tensor3()
        self.Gen_x = T.tensor4()
        self.D_layers = []
        self.D_layer_adv = []
        self.D_layer_z_recon = []
        self.gen_lr = T.scalar()  # learning rate
        self.disc_lr = T.scalar()  # learning rate
        self.y = T.ivector()
        self.y_1hot = T.matrix()
        self.Gen_x_list = []
        self.y_recon_list = []
        self.mincost = T.scalar()
        #self.enc_layer_fc3 = self.get_enc_layer_fc3()

        self.real_fc3 = LL.get_output(self.enc_layer_fc3,
                                      self.x,
                                      deterministic=True)
Exemplo n.º 55
0
class GumbelSoftmax(BaseLayer):
    """
    This class implements gumbel softmax activation.
    See "Categorical Reparameterization with Gumbel-softmax".
    (Eric Jang, Shixiang Gu, Ben Poole, 2016.)
    """
    def __init__(self, input_shape, temperature_init=0.1, seed=9683):
        """
        This function initializes the class.

        Parameters
        ----------
        shape: tuple
            a tuple of shape of gumbel random distribution.
            this is required for scan to not affected.
        temperature_init: float, default: 1
            a positive float value.
            if T > 1, become more soften, and T < 1, become more sharpen.
            if temperature is 1, same as normal softmax.
        """
        super(GumbelSoftmax, self).__init__()
        # check asserts
        assert isinstance(input_shape, tuple) and len(
            input_shape) == 1, '"input_shape" should be a tuple of shape.'
        assert temperature_init > 0, '"temperature_init" should be a positive float.'

        # set members
        self.input_shape = input_shape
        self.temperature_init = temperature_init
        self.rng = MRG(seed)

    def set_shared(self):
        """
        This function overrides the parents' one.
        Set shared variables.

        Shared Variables
        ----------------
        temperature: scalar
        """
        temperature = np.array(self.temperature_init).astype('float32')
        self.temperature = theano.shared(temperature,
                                         self.name + '_temperature')
        self.temperature.tags = ['temperature', self.name]

    def change_temperature(self, new_temperature):
        """
        This function changes the temperature for softmax.

        Parameters
        ----------
        new_temperature: float
            a positive float value which will be a new temperature.
        """
        # check asserts
        assert new_temperature > 0, '"new_temperature" should be a positive float.'
        self.temperature.set_value(float(new_temperature))

    def get_output(self, input_):
        """
        This function overrides the parents' one.
        Softmax converts output energy to probability distributuion.

        Math Expression
        -------------------
        g_k ~ Gumbel(0, 1)
        y_k = exp((x_k + g_k) / T) / \sum(exp((x_i + g_k) / T))

        Parameters
        ----------
        input_: TensorVariable

        Returns
        -------
        TensorVariable
        """
        # generate random gumbel distribution
        uniform_random = self.rng.uniform(
            (self.batch_size, self.input_shape[0]), 0, 1)
        gumbel_random = -T.log(-T.log(uniform_random + 1e-7) + 1e-7)
        return T.nnet.softmax((input_ + gumbel_random) /
                              self.temperature)  # divide by temperature
Exemplo n.º 56
0
class LSTMGenerator:
    """
  A multimodal long short-term memory (LSTM) generator
  """

    # ========================================================================================
    def __init__(self, params):

        image_encoding_size = params.get('image_encoding_size', 128)
        word_encoding_size = params.get('word_encoding_size', 128)

        hidden_size = params.get('hidden_size', 128)
        hidden_depth = params.get('hidden_depth', 1)
        generator = params.get('generator', 'lstm')
        vocabulary_size = params.get('vocabulary_size', -1)
        output_size = params.get('output_size', -1)
        image_feat_size = params.get('image_feat_size',
                                     -1)  # size of CNN vectors hardcoded here

        aux_inp_size = params.get('aux_inp_size', -1)

        model = OrderedDict()
        # Recurrent weights: take x_t, h_{t-1}, and bias unit
        # and produce the 3 gates and the input to cell signal

        encoder = params.get('feat_encoder', None)
        use_feat_enc = params.get('use_encoder_for', 0)

        if not (use_feat_enc & 1):
            model['WIemb'] = initwTh(image_feat_size,
                                     word_encoding_size)  # image encoder
            model['b_Img'] = np.zeros(
                (word_encoding_size)).astype(config.floatX)

        model['Wemb'] = initwTh(vocabulary_size,
                                word_encoding_size)  # word encoder
        model['lstm_W_hid'] = initwTh(hidden_size, 4 * hidden_size)
        model['lstm_W_inp'] = initwTh(word_encoding_size, 4 * hidden_size)

        for i in xrange(1, hidden_depth):
            model['lstm_W_hid_' + str(i)] = initwTh(hidden_size,
                                                    4 * hidden_size)
            model['lstm_W_inp_' + str(i)] = initwTh(hidden_size,
                                                    4 * hidden_size)

        model['lstm_b'] = np.zeros((4 * hidden_size, )).astype(config.floatX)
        # Decoder weights (e.g. mapping to vocabulary)

        if params.get('class_out_factoring', 0) == 0:
            model['Wd'] = initwTh(hidden_size, output_size)  # decoder
            model['bd'] = np.zeros((output_size, )).astype(config.floatX)
        else:
            clsinfo = params['ixtoclsinfo']
            self.clsinfo = clsinfo
            clsSizes = clsinfo[:, 2] - clsinfo[:, 1]
            self.clsSize = np.zeros(params['nClasses'])
            self.clsOffset = np.zeros(params['nClasses'], dtype=np.int32)
            self.clsSize[clsinfo[:, 0]] = clsSizes
            self.clsOffset[clsinfo[:, 0]] = np.int32(clsinfo[:, 1])
            max_cls_size = np.max(clsSizes)
            self.max_cls_size = max_cls_size
            Wd = np.zeros(
                (params['hidden_size'], params['nClasses'], max_cls_size),
                dtype=config.floatX)
            model['bd'] = np.zeros((1, params['nClasses'], max_cls_size),
                                   dtype=config.floatX)
            for cix in clsinfo[:, 0]:
                Wd[:, cix, :clsSizes[cix]] = initwTh(params['hidden_size'],
                                                     clsSizes[cix])
                model['bd'][0, cix, clsSizes[cix]:] = -100
            model['Wd'] = Wd

        update_list = [
            'lstm_W_hid', 'lstm_W_inp', 'lstm_b', 'Wd', 'bd', 'Wemb'
        ]
        self.regularize = ['lstm_W_hid', 'lstm_W_inp', 'Wd', 'Wemb']
        if not (use_feat_enc & 1):
            update_list.extend(['WIemb', 'b_Img'])
            self.regularize.extend(['WIemb'])

        if params.get('class_out_factoring', 0) == 1:
            model['WdCls'] = initwTh(hidden_size,
                                     params['nClasses'])  # decoder
            model['bdCls'] = np.zeros(
                (params['nClasses'], )).astype(config.floatX)
            update_list.extend(['WdCls', 'bdCls'])
            self.regularize.extend(['WdCls'])

        for i in xrange(1, hidden_depth):
            update_list.append('lstm_W_hid_' + str(i))
            update_list.append('lstm_W_hid_' + str(i))
            self.regularize.append('lstm_W_inp_' + str(i))
            self.regularize.append('lstm_W_inp_' + str(i))

        if params.get('en_aux_inp', 0):
            if params.get('swap_aux', 1) == 1:
                if not (use_feat_enc & 2) or params.get(
                        'encode_gt_sentences', 0):
                    model['WIemb_aux'] = initwTh(
                        aux_inp_size, image_encoding_size)  # image encoder
                    model['b_Img_aux'] = np.zeros(
                        (image_encoding_size)).astype(config.floatX)
                    update_list.append('WIemb_aux')
                    self.regularize.append('WIemb_aux')
                    update_list.append('b_Img_aux')
                model['lstm_W_aux'] = initwTh(image_encoding_size,
                                              4 * hidden_size, 0.00005)
            else:
                model['lstm_W_aux'] = initwTh(aux_inp_size, 4 * hidden_size,
                                              0.001)
            update_list.append('lstm_W_aux')
            self.regularize.append('lstm_W_aux')

        if params.get('gen_input_noise', 0):
            self.noise_dim = params.get('gen_inp_noise_dim', 50)
            model['lstm_W_noise'] = initwTh(self.noise_dim, 4 * hidden_size,
                                            0.001)

        self.model_th = self.init_tparams(model)
        del model
        if params.get('use_gumbel_mse', 0):
            self.usegumbel = theano.shared(1)
            self.gumb_temp = theano.shared(
                numpy_floatX(params.get('gumbel_temp_init', 0.5)))
            #self.model_th['gumb_temp'] = self.gumb_temp
            self.softmax_smooth_factor = theano.shared(
                numpy_floatX(params.get('softmax_smooth_factor', 1.0)))
        else:
            self.usegumbel = theano.shared(0)
        self.update_list = update_list

# ========================================================================================

    def init_tparams(self, params):
        tparams = OrderedDict()
        for kk, pp in params.iteritems():
            tparams[kk] = theano.shared(params[kk], name=kk)
        return tparams

# ========================================================================================
# BUILD LSTM forward propogation model

    def build_model(self, tparams, options, xI=None, xAux=None, attn_nw=None):
        self.trng = RandomStreams(int(time.time()))

        # Used for dropout.
        use_noise = theano.shared(numpy_floatX(0.))

        xW = tensor.matrix('xW', dtype='int64')

        mask = tensor.matrix('mask', dtype=config.floatX)
        n_timesteps = xW.shape[0]
        n_samples = xW.shape[1]

        embW = tparams['Wemb'][xW.flatten()].reshape(
            [n_timesteps, n_samples, options['word_encoding_size']])
        if xI == None:
            xI = tensor.matrix('xI', dtype=config.floatX)
            embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img'])
            xI_is_inp = True
        else:
            embImg = xI
            xI_is_inp = False

        if xAux == None:
            xAux = tensor.matrix(
                'xAux',
                dtype=config.floatX) if attn_nw == None else tensor.tensor3(
                    'xAux', dtype=config.floatX)
            if (options.get('swap_aux', 1)) and (attn_nw == None):
                xAuxEmb = tensor.dot(
                    xAux, tparams['WIemb_aux']) + tparams['b_Img_aux']
            else:
                xAuxEmb = xAux
            xA_is_inp = True
        else:
            xA_is_inp = False
            if options.get('encode_gt_sentences', 0):
                xAuxEmb = tensor.dot(
                    xAux, tparams['WIemb_aux']) + tparams['b_Img_aux']
            else:
                xAuxEmb = xAux

        embImg = embImg.reshape([1, n_samples, options['image_encoding_size']])
        emb = tensor.concatenate([embImg, embW], axis=0)

        #This is implementation of input dropout !!
        if options['use_dropout']:
            emb = dropout_layer(emb,
                                use_noise,
                                self.trng,
                                options['drop_prob_encoder'],
                                shp=emb.shape)
            if (options.get('en_aux_inp', 0)) and (attn_nw == None):
                xAuxEmb = dropout_layer(xAuxEmb,
                                        use_noise,
                                        self.trng,
                                        options['drop_prob_aux'],
                                        shp=xAuxEmb.shape)

        # Implement scehduled sampling!
        if options.get('sched_sampling_mode', None) != None:
            curr_epoch = tensor.scalar(name='curr_epoch', dtype=config.floatX)

            # Assign the probabilies according to the scheduling mode
            if options['sched_sampling_mode'] == 'linear':
                prob = tensor.maximum(
                    options['sslin_min'], options['sched_sampling_const'] -
                    options['sslin_slope'] * curr_epoch)
            elif options['sched_sampling_mode'] == 'exp':
                raise ValueError(
                    'ERROR: %s --> This solver type is not yet supported' %
                    (options['sched_sampling_mode']))
            elif options['sched_sampling_mode'] == 'invsig':
                raise ValueError(
                    'ERROR: %s --> This solver type is not yet supported' %
                    (options['sched_sampling_mode']))
            else:
                raise ValueError(
                    'ERROR: %s --> This scheduling type is unknown' %
                    (options['sched_sampling_mode']))

            # Now to build the mask. We don't want to do this coin toss when
            # feeding in image feature and the start symbol
            sched_mask = self.trng.binomial((n_timesteps - 2, n_samples),
                                            p=prob,
                                            n=1,
                                            dtype='int64')
            sched_mask = tensor.concatenate(
                [sched_mask, tensor.alloc(1, 2, n_samples)], axis=0)
        else:
            sched_mask = []

        #############################################################################################################################
        # This implements core lstm
        rval, updatesLSTM = basic_lstm_layer(tparams,
                                             emb[:n_timesteps, :, :],
                                             xAuxEmb,
                                             use_noise,
                                             options,
                                             prefix=options['generator'],
                                             sched_prob_mask=sched_mask,
                                             attn_nw=attn_nw)
        #############################################################################################################################

        # NOTE1: we are leaving out the first prediction, which was made for the image and is meaningless.
        if options['use_dropout']:
            # XXX : Size given to dropout is missing one dimension. This keeps the dropped units consistent across time!?.
            # ###   Is this a good bug ?
            p = dropout_layer(
                sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1),
                       options['hidden_size']), use_noise, self.trng,
                options['drop_prob_decoder'],
                (n_samples, options['hidden_size']))
        else:
            p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1),
                       options['hidden_size'])

        if options.get('class_out_factoring', 0) == 1:
            if options.get('cls_diff_layer', 0) == 1:
                pC_inp = dropout_layer(
                    sliceT(rval[0][1:, :, :],
                           options.get('hidden_depth', 1) - 2,
                           options['hidden_size']), use_noise, self.trng,
                    options['drop_prob_decoder'],
                    (n_samples, options['hidden_size']))
            else:
                pC_inp = p

        n_out_samps = (n_timesteps - 1) * n_samples
        if options.get('class_out_factoring', 0) == 0:
            pW = (tensor.dot(p, tparams['Wd']) + tparams['bd']).reshape(
                [n_out_samps, options['output_size']])
            if options.get('use_gumbel_mse', 0) == 0:
                pWSft = tensor.nnet.softmax(pW)
            else:
                w_out = ifelse(
                    self.usegumbel,
                    gumbel_softmax_sample(self.trng,
                                          pW,
                                          self.gumb_temp,
                                          hard=options.get(
                                              'use_gumbel_hard', False)),
                    tensor.nnet.softmax(pW))
                # This is not exactly right, but just testing
                pWSft = w_out

            totProb = pWSft[tensor.arange(n_out_samps), xW[1:, :].flatten()]
            out_list = [pWSft, totProb, pW]
        else:
            ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo)
            xC = ixtoclsinfo_t[xW[1:, :].flatten(), 0]
            if options.get('cls_zmean', 1):
                pW = ((tparams['Wd'][:, xC, :].T *
                       ((p.reshape([1, n_out_samps, options['hidden_size']]) -
                         tparams['WdCls'][:, xC].T))).sum(axis=-1).T +
                      tparams['bd'][:, xC, :])
            else:
                pW = ((tparams['Wd'][:, xC, :].T *
                       (p.reshape([1, n_out_samps, options['hidden_size']]))
                       ).sum(axis=-1).T + tparams['bd'][:, xC, :])
            pWSft = tensor.nnet.softmax(pW[0, :, :])

            pC = (tensor.dot(pC_inp, tparams['WdCls']) +
                  tparams['bdCls']).reshape([n_out_samps, options['nClasses']])
            pCSft = tensor.nnet.softmax(pC)

            totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \
                      pCSft[tensor.arange(n_out_samps), xC]
            out_list = [pWSft, pCSft, totProb, p]

        tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:, :].flatten()).sum()
        tot_pplx = -(tensor.log2(totProb + 1e-10) *
                     mask[1:, :].flatten()).sum()
        cost = [
            tot_cost / tensor.cast(n_samples, dtype=config.floatX), tot_pplx
        ]

        inp_list = [xW, mask]
        if xI_is_inp:
            inp_list.append(xI)

        if options.get('en_aux_inp', 0) and xA_is_inp:
            inp_list.append(xAux)

        if options.get('sched_sampling_mode', None) != None:
            inp_list.append(curr_epoch)

        f_pred_prob = theano.function([xW, xI, xAux],
                                      out_list,
                                      name='f_pred_prob',
                                      updates=updatesLSTM)

        return use_noise, inp_list, f_pred_prob, cost, out_list, updatesLSTM

# ========================================================================================
# Predictor Related Stuff!!

    def prepPredictor(self,
                      model_npy=None,
                      checkpoint_params=None,
                      beam_size=5,
                      xI=None,
                      xAux=None,
                      inp_list_prev=[],
                      per_word_logweight=None):
        if model_npy != None:
            if type(model_npy[model_npy.keys()[0]]) == np.ndarray:
                zipp(model_npy, self.model_th)
            else:
                self.model_th = model_npy

        #theano.config.exception_verbosity = 'high'
        self.beam_size = beam_size

        # Now we build a predictor model
        if checkpoint_params.get('advers_gen', 0) == 1:
            checkpoint_params['n_gen_samples'] = beam_size
        (inp_list_gen, predLogProb, predIdx, predCand, wOut_emb, updates,
         seq_lengths) = self.build_prediction_model(
             self.model_th,
             checkpoint_params,
             xI,
             xAux,
             per_word_logweight=per_word_logweight)
        self.f_pred_th = theano.function(inp_list_prev + inp_list_gen,
                                         [predLogProb, predIdx, predCand],
                                         name='f_pred')

        # Now we build a training model which evaluates cost. This is for the evaluation part in the end
        if checkpoint_params.get('advers_gen', 0) == 0:
            (self.use_dropout, inp_list_gen2, f_pred_prob, cost, predTh,
             updatesLSTM) = self.build_model(self.model_th, checkpoint_params,
                                             xI, xAux)
            self.f_eval = theano.function(inp_list_prev + inp_list_gen2,
                                          cost,
                                          name='f_eval')

# ========================================================================================

    def predict(self, batch, checkpoint_params, ext_inp=[]):

        inp_list = ext_inp
        if not checkpoint_params.get('use_encoder_for', 0) & 1:
            inp_list.extend([
                batch[0]['image']['feat'].reshape(
                    1,
                    checkpoint_params['image_feat_size']).astype(config.floatX)
            ])

        if not checkpoint_params.get('use_encoder_for', 0) & 2:
            if checkpoint_params.get('en_aux_inp', 0):
                inp_list.append(batch[0]['image']['aux_inp'].reshape(
                    1,
                    checkpoint_params['aux_inp_size']).astype(config.floatX))

        Ax = self.f_pred_th(*inp_list)

        # Backtracking to decode the correct sequence of candidates
        Ys = []
        for i in xrange(self.beam_size):
            candI = []
            curr_cand = Ax[2][-1][i]
            for j in reversed(xrange(Ax[1].shape[0] - 1)):
                candI.insert(0, Ax[1][j][curr_cand])
                curr_cand = Ax[2][j][curr_cand]

            Ys.append([Ax[0][i], candI])
        return [Ys], Ax

    def build_prediction_model(self,
                               tparams,
                               options,
                               xI=None,
                               xAux=None,
                               per_word_logweight=None):
        #Initialize random streams for other to use.
        self.trng = RandomStreams(int(time.time()))

        if xI == None:
            xI = tensor.matrix('xI', dtype=config.floatX)
            embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img'])
            xI_is_inp = True
        else:
            xI_is_inp = False
            embImg = xI
        if xAux == None and options.get('en_aux_inp', 0):
            xAux = tensor.matrix('xAux', dtype=config.floatX)
            xA_is_inp = True
            if options.get('swap_aux', 1):
                xAuxEmb = tensor.dot(
                    xAux, tparams['WIemb_aux']) + tparams['b_Img_aux']
            else:
                xAuxEmb = xAux
        else:
            xA_is_inp = False
            if options.get('encode_gt_sentences', 0):
                xAuxEmb = tensor.dot(
                    xAux, tparams['WIemb_aux']) + tparams['b_Img_aux']
            else:
                xAuxEmb = xAux

        if options.get('advers_gen', 0) == 1:
            accLogProb, Idx, wOut_emb, updates, seq_lengths = self.lstm_advers_gen_layer(
                tparams, embImg, xAuxEmb, options, prefix=options['generator'])
            Cand = tensor.tile(tensor.arange(Idx.shape[1]), [Idx.shape[0], 1])
        else:
            accLogProb, Idx, Cand, wOut_emb, updates = self.lstm_predict_layer(
                tparams,
                embImg,
                xAuxEmb,
                options,
                self.beam_size,
                prefix=options['generator'],
                per_word_logweight=per_word_logweight)
            seq_lengths = []

        inp_list = []
        if xI_is_inp:
            inp_list.append(xI)
        if options.get('en_aux_inp', 0) and xA_is_inp:
            inp_list.append(xAux)

        return inp_list, accLogProb, Idx, Cand, wOut_emb, updates, seq_lengths

# ========================================================================================
# LSTM LAYER in Prediction mode. Here we don't provide the word sequences, just the image feature vector
# The network starts first with forward propogatin the image feature vector. Then we pass the start word feature
# i.e zeroth word vector. From then the network output word (i.e ML word) is fed as the input to the next time step.
# In beam_size > 1 we could repeat a time step multiple times, once for each beam!!.

    def lstm_predict_layer(self,
                           tparams,
                           Xi,
                           aux_input,
                           options,
                           beam_size,
                           prefix='lstm',
                           per_word_logweight=None):

        nMaxsteps = options.get('maxlen', 30)

        if nMaxsteps is None:
            nMaxsteps = 30
        n_samples = 1
        h_depth = options.get('hidden_depth', 1)
        h_sz = options['hidden_size']

        # ----------------------  STEP FUNCTION  ---------------------- #
        def _stepP(x_, h_, c_, lP_, dV_, xAux):
            preact = tensor.dot(sliceT(h_, 0, h_sz),
                                tparams[_p(prefix, 'W_hid')])
            preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) +
                       tparams[_p(prefix, 'b')])
            if options.get('en_aux_inp', 0):
                preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')])

            hL = [[]] * h_depth
            cL = [[]] * h_depth
            outp = [[]] * h_depth
            for di in xrange(h_depth):
                i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz))
                f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz))
                o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz))
                cL[di] = tensor.tanh(sliceT(preact, 3, h_sz))
                cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di]
                hL[di] = o * tensor.tanh(cL[di])
                outp[di] = hL[di]
                if options.get('en_residual_conn', 1):
                    if (di > 0):
                        outp[di] += outp[di - 1]
                        print "Connecting residual at %d" % (di)
                if di < (h_depth - 1):
                    preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \
                            tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))])

            c = tensor.concatenate(cL, axis=1)
            h = tensor.concatenate(hL, axis=1)

            if options.get('class_out_factoring', 0) == 1:
                if options.get('cls_diff_layer', 0) == 1:
                    pC = tensor.dot(hL[-2],
                                    tparams['WdCls']) + tparams['bdCls']
                else:
                    pC = tensor.dot(outp[-1],
                                    tparams['WdCls']) + tparams['bdCls']

                pCSft = tensor.nnet.softmax(pC)
                xCIdx = tensor.argmax(pCSft, axis=-1)
                #pW = tensor.dot(outp[-1],tparams['Wd'][:,xCIdx,:]) + tparams['bd'][:,xCIdx,:]
                #smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f')
                #pWSft = tensor.nnet.softmax(pW*smooth_factor)
                #lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0,xCIdx] + 1e-20)
                #########################################################
                # pW is now of size (beam_size, n_classes, class_size)
                if options.get('cls_zmean', 0):
                    pW = tensor.dot(
                        (outp[-1] - tparams['WdCls'][:, xCIdx].T),
                        tparams['Wd'].swapaxes(0, 1)) + tparams['bd'][0, :, :]
                else:
                    pW = tensor.dot((outp[-1]), tparams['Wd'].swapaxes(
                        0, 1)) + tparams['bd'][0, :, :]
                #smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f')
                pWSft = tensor.nnet.softmax(
                    pW.reshape([pW.shape[0] * pW.shape[1],
                                pW.shape[2]])).reshape(
                                    [pW.shape[0], pW.shape[1] * pW.shape[2]])
                ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo)
                lProb = tensor.log(pWSft[:,ixtoclsinfo_t[:,0]*tparams['Wd'].shape[2]+ixtoclsinfo_t[:,3]] + 1e-20) + \
                        tensor.log(pCSft[0,ixtoclsinfo_t[:,0]] + 1e-20)
            else:
                p = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd']
                smooth_factor = tensor.as_tensor_variable(numpy_floatX(
                    options.get('softmax_smooth_factor', 1.0)),
                                                          name='sm_f')
                p = tensor.nnet.softmax(p * smooth_factor)
                lProb = tensor.log(p + 1e-20)
                if per_word_logweight is not None:
                    log_w = theano.shared(
                        per_word_logweight)  #, dtype= theano.config.floatX)
                    lProb = log_w + lProb

            if beam_size > 1:

                def _FindB_best(lPLcl, lPprev, dVLcl):
                    srtLcl = tensor.argsort(-lPLcl)
                    srtLcl = srtLcl[:beam_size]
                    deltaVec = tensor.fill(lPLcl[srtLcl],
                                           numpy_floatX(-10000.))
                    deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
                    lProbBest = ifelse(
                        tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                        lPLcl[srtLcl] + lPprev, deltaVec)
                    xWIdxBest = ifelse(
                        tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl,
                        tensor.zeros_like(srtLcl))
                    return lProbBest, xWIdxBest

                rvalLcl, updatesLcl = theano.scan(_FindB_best,
                                                  sequences=[lProb, lP_, dV_],
                                                  name=_p(prefix, 'FindBest'),
                                                  n_steps=x_.shape[0])
                xWIdxBest = rvalLcl[1]
                lProbBest = rvalLcl[0]

                xWIdxBest = xWIdxBest.flatten()
                lProb = lProbBest.flatten()
                # Now sort and find the best among these best extensions for the current beams
                srtIdx = tensor.argsort(-lProb)
                srtIdx = srtIdx[:beam_size]
                xCandIdx = srtIdx // beam_size  # Floor division
                h = h.take(xCandIdx.flatten(), axis=0)
                c = c.take(xCandIdx.flatten(), axis=0)
                xWlogProb = lProb[srtIdx]
                xWIdx = xWIdxBest[srtIdx]
                if options.get('class_out_factoring', 0) == 1:
                    clsoffset = tensor.as_tensor_variable(self.clsOffset)
            else:
                xCandIdx = tensor.as_tensor_variable([0])
                lProb = lProb.flatten()
                xWIdx = tensor.argmax(lProb, keepdims=True)
                xWlogProb = lProb[xWIdx] + lP_
                #if options.get('class_out_factoring',0) == 1:
                #    clsoffset = tensor.as_tensor_variable(self.clsOffset)
                #    xWIdx += clsoffset[xCIdx]
                h = h.take(xCandIdx.flatten(), axis=0)
                c = c.take(xCandIdx.flatten(), axis=0)

            if options.get('softmax_propogate', 0) == 0:
                xW = tparams['Wemb'][xWIdx.flatten()]
            else:
                xW = p.dot(tparams['Wemb'])
            doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx))

            return [xW, h, c, xWlogProb, doneVec, xWIdx,
                    xCandIdx], theano.scan_module.until(doneVec.all())

        # ------------------- END of STEP FUNCTION  -------------------- #

        if options.get('en_aux_inp', 0) == 0:
            aux_input = []

        h = tensor.alloc(numpy_floatX(0.), beam_size, h_sz * h_depth)
        c = tensor.alloc(numpy_floatX(0.), beam_size, h_sz * h_depth)

        lP = tensor.alloc(numpy_floatX(0.), beam_size)
        dV = tensor.alloc(np.int8(0.), beam_size)

        # Propogate the image feature vector
        [xW, h, c, _, _, _, _], _ = _stepP(Xi, h[:1, :], c[:1, :], lP, dV,
                                           aux_input)

        xWStart = tparams['Wemb'][[0]]
        [xW, h, c, lP, dV, idx0,
         cand0], _ = _stepP(xWStart, h[:1, :], c[:1, :], lP, dV, aux_input)

        if options.get('en_aux_inp', 0) == 1:
            aux_input = tensor.extra_ops.repeat(aux_input, beam_size, axis=0)

        # Now lets do the loop.
        rval, updates = theano.scan(
            _stepP,
            outputs_info=[xW, h, c, lP, dV, None, None],
            non_sequences=[aux_input],
            name=_p(prefix, 'predict_layers'),
            n_steps=nMaxsteps)

        return rval[3][-1], tensor.concatenate(
            [idx0.reshape([1, beam_size]), rval[5]],
            axis=0), tensor.concatenate(
                [cand0.reshape([1, beam_size]), rval[6]],
                axis=0), tensor.concatenate(
                    [tensor.shape_padleft(xW, n_ones=1), rval[0]],
                    axis=0), updates

#================================================================================================================

    def lstm_advers_gen_layer(self, tparams, xI, xAux, options, prefix='lstm'):
        nBatchSamps = xI.shape[0]
        nMaxsteps = options.get('maxlen', 15)
        if nMaxsteps is None:
            nMaxsteps = 30
        n_samp = options.get('n_gen_samples', 1)

        h_depth = options.get('hidden_depth', 1)
        h_sz = options['hidden_size']

        # ----------------------  STEP FUNCTION  ---------------------- #
        def _stepP(U, xW_, h_, c_, lP_, dV_, xAux, xNoise):
            preact = tensor.dot(sliceT(h_, 0, h_sz),
                                tparams[_p(prefix, 'W_hid')])
            preact += (tensor.dot(xW_, tparams[_p(prefix, 'W_inp')]) +
                       tparams[_p(prefix, 'b')])
            preact += xAux
            if options.get('gen_input_noise', 0):
                preact += xNoise

            hL = [[]] * h_depth
            cL = [[]] * h_depth
            outp = [[]] * h_depth
            for di in xrange(h_depth):
                i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz))
                f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz))
                o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz))
                cL[di] = tensor.tanh(sliceT(preact, 3, h_sz))
                cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di]
                hL[di] = o * tensor.tanh(cL[di])
                outp[di] = hL[di]
                if options.get('en_residual_conn', 1):
                    if (di > 0):
                        outp[di] += outp[di - 1]
                        print "Connecting residual at %d" % (di)
                if di < (h_depth - 1):
                    preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \
                            tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))])

            c = tensor.concatenate(cL, axis=1)
            h = tensor.concatenate(hL, axis=1)

            logits = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd']
            #p = tensor.dot(outp[-1],l2norm(tparams['Wd'],axis=0))# + tparams['bd']
            if options.get('use_gumbel_mse', 0) == 0 or options.get(
                    'greedy', 0):
                p = tensor.nnet.softmax(logits)
            else:
                p = gumbel_softmax_sample(
                    self.trng, logits * self.softmax_smooth_factor,
                    self.gumb_temp, U, options.get('use_gumbel_hard', False))

            if options.get('computelogprob', 0):
                lProb = tensor.log(
                    tensor.nnet.softmax(logits * self.softmax_smooth_factor) +
                    1e-20)
            else:
                lProb = logits

            # Idx of the correct word should come from the
            xWIdx = ~dV_ * tensor.argmax(p, axis=-1)

            xWlogProb = ~dV_ * lProb[tensor.arange(nBatchSamps * n_samp),
                                     xWIdx] + lP_
            #xW = tparams['Wemb'][xWIdx.flatten()]
            if options.get('use_gumbel_hard', 0) and options.get(
                    'use_gumbel_mse', 0) and not options.get('greedy', 0):
                xW = p.dot(tparams['Wemb'])
            else:
                xW = theano.gradient.disconnected_grad(
                    tparams['Wemb'][xWIdx.flatten()].reshape(
                        [xWIdx.shape[0], -1]))

            doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx))

            return [xW, h, c, xWlogProb, doneVec, xWIdx,
                    p], theano.scan_module.until(doneVec.all())

        # ------------------- END of STEP FUNCTION  -------------------- #

        if options.get('use_gumbel_mse', 0) == 0:
            U = self.trng.uniform((nMaxsteps, 1),
                                  low=0.,
                                  high=1.,
                                  dtype=theano.config.floatX)
        else:
            U = self.trng.uniform((nMaxsteps + 1, nBatchSamps * n_samp,
                                   options['vocabulary_size']),
                                  low=0.,
                                  high=1.,
                                  dtype=theano.config.floatX)

        xI = tensor.extra_ops.repeat(xI, n_samp, axis=0)
        xAux = tensor.extra_ops.repeat(tensor.dot(xAux,
                                                  tparams[_p(prefix,
                                                             'W_aux')]),
                                       n_samp,
                                       axis=0)

        if options.get('gen_input_noise', 0):
            xNoise = tensor.dot(
                self.trng.normal([nBatchSamps * n_samp, self.noise_dim]),
                tparams[_p(prefix, 'W_noise')])
        else:
            xNoise = []

        if options.get('gen_use_rand_init',
                       0) and not options.get('gen_input_noise', 0):
            h = tensor.unbroadcast(
                self.trng.uniform([nBatchSamps * n_samp, h_sz * h_depth],
                                  low=-0.1,
                                  high=0.1), 0, 1)
            c = tensor.unbroadcast(
                self.trng.uniform([nBatchSamps * n_samp, h_sz * h_depth],
                                  low=-0.1,
                                  high=0.1), 0, 1)
        else:
            h = tensor.zeros([nBatchSamps * n_samp, h_sz * h_depth])
            c = tensor.zeros([nBatchSamps * n_samp, h_sz * h_depth])

        lP = tensor.alloc(numpy_floatX(0.), nBatchSamps * n_samp)
        dV = tensor.alloc(np.bool_(0.), nBatchSamps * n_samp)

        # Propogate the image feature vector
        [_, h, c, _, _, _, _], _ = _stepP(U[0, :], xI, h, c, lP, dV, xAux,
                                          xNoise)

        xWStart = tensor.unbroadcast(
            tensor.tile(tparams['Wemb'][[0]], [nBatchSamps * n_samp, 1]), 0, 1)

        # Now lets do the loop.
        rval, updates = theano.scan(
            _stepP,
            sequences=[U[1:, :]],
            outputs_info=[xWStart, h, c, lP, dV, None, None],
            non_sequences=[xAux, xNoise],
            name=_p(prefix, 'adv_predict_layers'),
            n_steps=nMaxsteps)

        seq_lengths = theano.gradient.disconnected_grad(
            tensor.argmax(tensor.concatenate(
                [rval[4][:-1, :],
                 tensor.ones((1, nBatchSamps * n_samp))],
                axis=0),
                          axis=0) + 1)

        return rval[3][-1], rval[5], rval[6], updates, seq_lengths

# ========================================================================================

    def build_eval_other_sent(self, tparams, options, model_npy):

        zipp(model_npy, self.model_th)

        # Used for dropout.
        use_noise = theano.shared(numpy_floatX(0.))

        xW = tensor.matrix('xW', dtype='int64')
        mask = tensor.matrix('mask', dtype=config.floatX)
        n_timesteps = xW.shape[0]
        n_samples = xW.shape[1]
        n_out_samps = (n_timesteps - 1) * n_samples

        embW = tparams['Wemb'][xW.flatten()].reshape(
            [n_timesteps, n_samples, options['word_encoding_size']])
        xI = tensor.matrix('xI', dtype=config.floatX)
        xAux = tensor.matrix('xAux', dtype=config.floatX)

        if options.get('swap_aux', 0):
            xAuxEmb = tensor.dot(xAux,
                                 tparams['WIemb_aux']) + tparams['b_Img_aux']
        else:
            xAuxEmb = xAux

        embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape(
            [1, n_samples, options['image_encoding_size']])
        emb = tensor.concatenate([embImg, embW], axis=0)

        rval, updatesLSTM = basic_lstm_layer(tparams,
                                             emb[:n_timesteps, :, :],
                                             xAuxEmb,
                                             use_noise,
                                             options,
                                             prefix=options['generator'])
        p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1),
                   options['hidden_size'])

        if options.get('class_out_factoring', 0) == 0:
            pW = (tensor.dot(p, tparams['Wd']) + tparams['bd']).reshape(
                [n_out_samps, options['output_size']])
            pWSft = tensor.nnet.softmax(pW)
            totProb = pWSft[tensor.arange(n_out_samps), xW[1:, :].flatten()]
            out_list = [pWSft, totProb, p]
        else:
            ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo)
            xC = ixtoclsinfo_t[xW[1:, :].flatten(), 0]
            pW = ((tparams['Wd'][:, xC, :].T *
                   ((p.reshape([1, n_out_samps, options['hidden_size']]) -
                     tparams['WdCls'][:, xC].T))).sum(axis=-1).T +
                  tparams['bd'][:, xC, :])
            pWSft = tensor.nnet.softmax(pW[0, :, :])
            pC = (tensor.dot(p, tparams['WdCls']) + tparams['bdCls']).reshape(
                [n_out_samps, options['nClasses']])
            pCSft = tensor.nnet.softmax(pC)

            totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \
                      pCSft[tensor.arange(n_out_samps), xC]

        tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:, :].flatten()
                     ).reshape([n_timesteps - 1, n_samples])
        cost = tot_cost.sum(axis=0)

        inp_list = [xW, mask, xI]

        if options.get('en_aux_inp', 0):
            inp_list.append(xAux)

        self.f_pred_prob_other = theano.function([xW, xI, xAux],
                                                 pWSft,
                                                 name='f_pred_prob',
                                                 updates=updatesLSTM)
        #f_pred = theano.function([xW, mask], pred.argmax(axis=1), name='f_pred')

        #cost = -tensor.log(pred[tensor.arange(n_timesteps),tensor.arange(n_samples), xW] + 1e-8).mean()

        self.f_eval_other = theano.function(inp_list, cost, name='f_eval')

        return use_noise, inp_list, self.f_pred_prob_other, cost, pW, updatesLSTM
Exemplo n.º 57
0
class InputLayer(lasagne.layers.InputLayer):
    def __init__(self,
                 shape,
                 input_var=None,
                 name=None,
                 binary=True,
                 deterministic=False,
                 threshold=0.5,
                 batch_size=100,
                 n_bits=-1,
                 **kwargs):
        self.rng_mrg = RandomStreams(lasagne.random.get_rng().randint(
            1, 2394349593))
        if binary == False:
            if n_bits == -1:  # no quantization at all
                super(InputLayer, self).__init__(shape=shape,
                                                 input_var=input_var,
                                                 name=name,
                                                 **kwargs)
            else:
                # Normalize to [0 ~ 1 - 2^(-n_bits)]
                input_var_normed = input_var * (1 - 2**(-n_bits))
                if deterministic == False:
                    shape_rand = list(shape)
                    if shape_rand[0] is None:
                        shape_rand[0] = batch_size
                    shape_rand = tuple(shape_rand)
                    input_var_ceil = T.ceil(
                        input_var_normed * 2**n_bits) / 2**n_bits
                    input_var_floor = T.floor(
                        input_var_normed * 2**n_bits) / 2**n_bits
                    input_var_above_floor = input_var - input_var_floor
                    input_var_stochastic_quantized = T.cast(
                        T.switch(
                            T.ge(
                                input_var_above_floor,
                                self.rng_mrg.uniform(
                                    shape_rand,
                                    low=0.0,
                                    high=2**(-n_bits),
                                    dtype=theano.config.floatX)),
                            input_var_ceil, input_var_floor),
                        theano.config.floatX)
                    super(InputLayer, self).__init__(
                        shape=shape,
                        input_var=input_var_stochastic_quantized,
                        name=name,
                        **kwargs)
                else:
                    input_var_deterministic_quantized = T.cast(
                        T.round(input_var_normed * 2**n_bits) / 2**n_bits,
                        theano.config.floatX)
                    super(InputLayer, self).__init__(
                        shape=shape,
                        input_var=input_var_deterministic_quantized,
                        name=name,
                        **kwargs)
        else:
            if deterministic == False:
                shape_rand = list(shape)
                if shape_rand[0] is None:
                    shape_rand[0] = batch_size
                shape_rand = tuple(shape_rand)
                # Bernoulli spikes
                input_var_stochastic_binarized = T.cast(
                    T.gt(
                        input_var,
                        self.rng_mrg.uniform(shape_rand,
                                             low=0.0,
                                             high=1.0,
                                             dtype=theano.config.floatX)),
                    theano.config.floatX)
                super(InputLayer,
                      self).__init__(shape=shape,
                                     input_var=input_var_stochastic_binarized,
                                     name=name,
                                     **kwargs)
            else:
                input_var_deterministic_binarized = T.cast(
                    T.switch(T.ge(input_var, threshold), 1.0, 0.),
                    theano.config.floatX)
                super(InputLayer, self).__init__(
                    shape=shape,
                    input_var=input_var_deterministic_binarized,
                    name=name,
                    **kwargs)
Exemplo n.º 58
0
tys = np.concatenate(tys, axis=0)

# assert txs.shape[0] == 6
# assert tys.shape[0] == 6
# trainx = txs.copy()
# trainy = tys.copy()

trainx_unl = trainx.copy()
trainx_unl2 = trainx.copy()
nr_batches_train = int(trainx.shape[0] / args.batch_size)
nr_batches_test = int(testx.shape[0] / args.batch_size)

# specify generative model
sf = args.net_scale_factor
noise_dim = (args.batch_size, 100)
noise = theano_rng.uniform(size=noise_dim)
gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)]
gen_layers.append(
    nn.batch_norm(ll.DenseLayer(gen_layers[-1],
                                num_units=13 * 1 * 512 / sf,
                                W=Normal(0.05),
                                nonlinearity=nn.relu),
                  g=None))
gen_layers.append(
    ll.ReshapeLayer(gen_layers[-1], (args.batch_size, 512 / sf, 13, 1)))
gen_layers.append(
    nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1],
                                   (args.batch_size, 256 / sf, 25, 1), (5, 1),
                                   stride=(2, 1),
                                   W=Normal(0.05),
                                   nonlinearity=nn.relu),
Exemplo n.º 59
0
class RandomizedRectifierLayer(Layer):
    """
    A layer that applies a randomized leaky rectify nonlinearity to its input.

    The randomized leaky rectifier was first proposed and used in the Kaggle
    NDSB Competition, and later evaluated in [1]_. Compared to the standard
    leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
    for negative input during training, and a fixed slope during evaluation.

    Equation for the randomized rectifier linear unit during training:
    :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

    During evaluation, the factor is fixed to the arithmetic mean of `lower`
    and `upper`.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or a tuple
        The layer feeding into this layer, or the expected input shape

    lower : Theano shared variable, expression, or constant
        The lower bound for the randomly chosen slopes.

    upper : Theano shared variable, expression, or constant
        The upper bound for the randomly chosen slopes.

    shared_axes : 'auto', 'all', int or tuple of int
        The axes along which the random slopes of the rectifier units are
        going to be shared. If ``'auto'`` (the default), share over all axes
        except for the second - this will share the random slope over the
        minibatch dimension for dense layers, and additionally over all
        spatial dimensions for convolutional layers. If ``'all'``, share over
        all axes, thus using a single random slope.

    **kwargs
        Any additional keyword arguments are passed to the `Layer` superclass.

     References
    ----------
    .. [1] Bing Xu, Naiyan Wang et al. (2015):
       Empirical Evaluation of Rectified Activations in Convolutional Network,
       http://arxiv.org/abs/1505.00853
    """
    def __init__(self, incoming, lower=0.3, upper=0.8, shared_axes='auto',
                 **kwargs):
        super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs)
        self._srng = RandomStreams(get_rng().randint(1, 2147462579))
        self.lower = lower
        self.upper = upper

        if not isinstance(lower > upper, theano.Variable) and lower > upper:
            raise ValueError("Upper bound for RandomizedRectifierLayer needs "
                             "to be higher than lower bound.")

        if shared_axes == 'auto':
            self.shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
        elif shared_axes == 'all':
            self.shared_axes = tuple(range(len(self.input_shape)))
        elif isinstance(shared_axes, int):
            self.shared_axes = (shared_axes,)
        else:
            self.shared_axes = shared_axes

    def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true, the arithmetic mean of lower and upper are used for the
            leaky slope.
        """
        if deterministic or self.upper == self.lower:
            return theano.tensor.nnet.relu(input, (self.upper+self.lower)/2.0)
        else:
            shape = list(self.input_shape)
            if any(s is None for s in shape):
                shape = list(input.shape)
            for ax in self.shared_axes:
                shape[ax] = 1

            rnd = self._srng.uniform(tuple(shape),
                                     low=self.lower,
                                     high=self.upper,
                                     dtype=theano.config.floatX)
            rnd = theano.tensor.addbroadcast(rnd, *self.shared_axes)
            return theano.tensor.nnet.relu(input, rnd)
Exemplo n.º 60
0
def test_uniform():
    # TODO: test param low, high
    # TODO: test size=None
    # TODO: test ndim!=size.ndim
    # TODO: test bad seed
    # TODO: test size=Var, with shape that change from call to call
    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']
            or mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 100)
        steps = 50
    else:
        sample_size = (500, 50)
        steps = int(1e3)

    x = tensor.matrix()
    for size, const_size, var_input, input in [
        (sample_size, sample_size, [], []),
        (x.shape, sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)]),
        ((x.shape[0], sample_size[1]), sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)]),
            # test empty size (scalar)
        ((), (), [], []),
    ]:

        #### TEST CPU IMPLEMENTATION ####
        # The python and C implementation are tested with DebugMode
        # print ''
        # print 'ON CPU with size=(%s):' % str(size)
        x = tensor.matrix()
        R = MRG_RandomStreams(234, use_cuda=False)
        # Note: we specify `nstreams` to avoid a warning.
        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
        # for such situations: it would be better to instead filter the
        # warning using the warning module.
        u = R.uniform(size=size,
                      nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, u, mode=mode)
        assert any([
            isinstance(node.op, theano.sandbox.rng_mrg.mrg_uniform)
            for node in f.maker.fgraph.toposort()
        ])
        # theano.printing.debugprint(f)
        cpu_out = f(*input)

        # print 'CPU: random?[:10], random?[-10:]'
        # print cpu_out[0, 0:10]
        # print cpu_out[-1, -10:]

        # Increase the number of steps if sizes implies only a few samples
        if numpy.prod(const_size) < 10:
            steps_ = steps * 100
        else:
            steps_ = steps
        basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input)

        if mode != 'FAST_COMPILE' and cuda_available:
            # print ''
            # print 'ON GPU with size=(%s):' % str(size)
            R = MRG_RandomStreams(234, use_cuda=True)
            u = R.uniform(size=size,
                          dtype='float32',
                          nstreams=rng_mrg.guess_n_streams(size, warn=False))
            # well, it's really that this test w GPU doesn't make sense otw
            assert u.dtype == 'float32'
            f = theano.function(
                var_input,
                theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(u),
                           borrow=True),
                mode=mode_with_gpu)
            assert any([
                isinstance(node.op, theano.sandbox.rng_mrg.GPU_mrg_uniform)
                for node in f.maker.fgraph.toposort()
            ])
            # theano.printing.debugprint(f)
            gpu_out = numpy.asarray(f(*input))

            # print 'GPU: random?[:10], random?[-10:]'
            # print gpu_out[0, 0:10]
            # print gpu_out[-1, -10:]
            basictest(f, steps_, const_size, prefix='mrg  gpu', inputs=input)

            numpy.testing.assert_array_almost_equal(cpu_out,
                                                    gpu_out,
                                                    decimal=6)

        # print ''
        # print 'ON CPU w Numpy with size=(%s):' % str(size)
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        uu = RR.uniform(size=size)
        ff = theano.function(var_input, uu, mode=mode)
        # It's not our problem if numpy generates 0 or 1
        basictest(ff,
                  steps_,
                  const_size,
                  prefix='numpy',
                  allow_01=True,
                  inputs=input)