示例#1
0
def get_generator_params(config):

    params = {}

    params = param_init_gru(options = None, param = params, prefix='gru1', nin=1, dim=config['num_hidden'])

    params = param_init_gru(options = None, param = params, prefix='gru2', nin=config['num_hidden'], dim=config['num_hidden'])

    params = param_init_fflayer(options = None, param = params, prefix='ff_1', nin=config['num_hidden'],nout=1,ortho=False)

    params = param_init_fflayer(options = None, param = params, prefix='ff_h1', nin=config['num_hidden'],nout=config['num_hidden'],ortho=False)

    params = param_init_fflayer(options = None, param = params, prefix='ff_h2', nin=config['num_hidden'],nout=config['num_hidden'],ortho=False)

    for paramKey in params:
        params[paramKey] = theano.shared(params[paramKey])

    return params
示例#2
0
    def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states):
        
        tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features))
        rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features))

        hidden_state_features = T.specify_shape(T.concatenate([tf_states, rf_states], axis = 1), (seq_length, mb_size * 2, num_features))

        gru_params_1 = init_tparams(param_init_gru(None, {}, prefix = "gru1", dim = num_hidden, nin = num_features))
        #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features))
        #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix = 'gru1')[0]
        #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0]
        #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0]

        final_out_recc = T.specify_shape(T.mean(gru_1_out, axis = 0), (mb_size * 2, num_hidden))

        h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units = 1, nonlinearity=None)

        h_out_1_value = h_out_1.get_output_for(final_out_recc)
        h_out_4_value = h_out_4.get_output_for(h_out_1_value)

        raw_y = h_out_4_value
        #raw_y = T.clip(h_out_4_value, -10.0, 10.0)
        classification = T.nnet.sigmoid(raw_y)

        #tf comes before rf.  
        p_real =  classification[:mb_size]
        p_gen  = classification[mb_size:]

        #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r))

        self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean()
        self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean()
        self.d_cost = self.d_cost_real + self.d_cost_gen
        self.g_cost = self.g_cost_d


        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1,trainable=True)

        self.params += gru_params_1.values()
        #self.params += gru_params_2.values()
        #self.params += gru_params_3.values()

        self.accuracy = T.mean(T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean(T.eq(T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))
def init_params(options):
    params = OrderedDict()
    
    #embedding
    params['Wemb'] = norm_weight(options['n_words_src'],
                                 options['dim_word'])
    params['Wemb_dec'] = norm_weight(options['n_words_tgt'],
                                     options['dim_word'])
    #encoder: bidirectional RNN
    params = param_init_gru(options,params,
                            prefix='encoder',
                            nin=options['dim_word'],
                            dim=options['dim'])
    params = param_init_gru(options,params,
                           prefix='encoder_r',
                           nin=options['dim_word'],
                           dim=options['dim'])
    ctxdim = 2*options['dim']
    #init state, init cell
    params = param_init_fflayer(options,params,prefix='ff_state',
                                nin=ctxdim,nout=options['dim'])
    #decoder
    params = param_init_gru_cond(options,params,
                                 prefix='decoder',
                                 nin=options['dim_word'],
                                 dim=options['dim'],
                                 dimctx=ctxdim)
    #readout
    params = param_init_fflayer(options,params,prefix='ff_logit_lstm',
                                nin=options['dim'],nout=options['dim_word'],
                                ortho=False)
    params = param_init_fflayer(options,params,prefix='ff_logit_prev',
                                nin=options['dim_word'],
                                nout=options['dim_word'],ortho=False)
    params = param_init_fflayer(options,params,prefix='ff_logit_ctx',
                                nin=ctxdim,nout=options['dim_word'],
                                ortho=False)
    params = param_init_fflayer(options,params,prefix='ff_logit', 
                                 nin=options['dim_word'],
                                 nout=options['n_words_tgt'])
    
    return params
示例#4
0
def get_generator_params(config):

    params = {}

    params = param_init_gru(options=None,
                            param=params,
                            prefix='gru1',
                            nin=1,
                            dim=config['num_hidden'])

    params = param_init_gru(options=None,
                            param=params,
                            prefix='gru2',
                            nin=config['num_hidden'],
                            dim=config['num_hidden'])

    params = param_init_fflayer(options=None,
                                param=params,
                                prefix='ff_1',
                                nin=config['num_hidden'],
                                nout=1,
                                ortho=False)

    params = param_init_fflayer(options=None,
                                param=params,
                                prefix='ff_h1',
                                nin=config['num_hidden'],
                                nout=config['num_hidden'],
                                ortho=False)

    params = param_init_fflayer(options=None,
                                param=params,
                                prefix='ff_h2',
                                nin=config['num_hidden'],
                                nout=config['num_hidden'],
                                ortho=False)

    for paramKey in params:
        params[paramKey] = theano.shared(params[paramKey])

    return params
示例#5
0
    def __init__(self, num_hidden, num_features, mb_size,
                 hidden_state_features, target):
        self.mb_size = mb_size
        #self.seq_length = seq_length

        #using 0.8
        hidden_state_features = dropout(hidden_state_features, 1.0)

        gru_params_1 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru1",
                           dim=num_hidden,
                           nin=num_features))
        gru_params_2 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru2",
                           dim=num_hidden,
                           nin=num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1,
                              hidden_state_features,
                              None,
                              prefix='gru1',
                              gradient_steps=100)[0]
        gru_2_out = gru_layer(gru_params_2,
                              T.concatenate([gru_1_out, hidden_state_features],
                                            axis=2),
                              None,
                              prefix='gru2',
                              backwards=True,
                              gradient_steps=100)[0]

        self.gru_1_out = gru_1_out

        final_out_recc = T.mean(gru_2_out, axis=0)

        h_out_1 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        h_out_2 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=1,
                             nonlinearity=None)

        h_out_1_value = dropout(h_out_1.get_output_for(final_out_recc), 1.0)
        h_out_2_value = dropout(h_out_2.get_output_for(h_out_1_value), 1.0)
        h_out_4_value = h_out_4.get_output_for(h_out_2_value)

        raw_y = T.clip(h_out_4_value, -10.0, 10.0)

        classification = T.nnet.sigmoid(raw_y)

        self.accuracy = T.mean(
            T.eq(target,
                 T.gt(classification, 0.5).flatten()))

        p_real = classification[0:mb_size]
        p_gen = classification[mb_size:mb_size * 2]

        self.d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()

        self.g_cost_real = bce(p_real, T.zeros(p_gen.shape)).mean()
        self.g_cost_gen = bce(p_gen, T.ones(p_real.shape)).mean()

        #self.g_cost = self.g_cost_gen
        self.g_cost = self.g_cost_real + self.g_cost_gen

        print "pulling both TF and PF togeher"

        self.d_cost = self.d_cost_real + self.d_cost_gen
        #if d_cost < 1.0, use g cost.

        self.d_cost = T.switch(
            T.gt(self.accuracy, 0.95) * T.gt(p_real.mean(), 0.99) *
            T.lt(p_gen.mean(), 0.01), 0.0, self.d_cost)
        '''
        gX = gen(Z, *gen_params)

        p_real = discrim(X, *discrim_params)
        p_gen = discrim(gX, *discrim_params)

        d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
        d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
        g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

        d_cost = d_cost_real + d_cost_gen
        g_cost = g_cost_d

        cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]
        d_updates = d_updater(discrim_params, d_cost)
        g_updates = g_updater(gen_params, g_cost)

        '''

        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4, trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1, trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_2, trainable=True)

        #self.params += h_out_1.getParams() + h_out_2.getParams() + h_out_3.getParams()

        #        self.params += lasagne.layers.get_all_params(h_initial_1,trainable=True)
        #        self.params += lasagne.layers.get_all_params(h_initial_2,trainable=True)

        self.params += gru_params_1.values()
        self.params += gru_params_2.values()
        '''
        layerParams = c1.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]
        layerParams = c2.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]
        layerParams = c3.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]

        '''

        #all_grads = T.grad(self.loss, self.params)
        #for j in range(0, len(all_grads)):
        #    all_grads[j] = T.switch(T.isnan(all_grads[j]), T.zeros_like(all_grads[j]), all_grads[j])
        #self.updates = lasagne.updates.adam(all_grads, self.params, learning_rate = 0.0001, beta1 = 0.5)
        '''
示例#6
0
    def __init__(self, num_hidden, num_features, seq_length, mb_size,
                 tf_states, rf_states):

        tf_states = T.specify_shape(tf_states,
                                    (seq_length, mb_size, num_features))
        rf_states = T.specify_shape(rf_states,
                                    (seq_length, mb_size, num_features))

        hidden_state_features = T.specify_shape(
            T.concatenate([tf_states, rf_states], axis=1),
            (seq_length, mb_size * 2, num_features))

        gru_params_1 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru1",
                           dim=num_hidden,
                           nin=num_features))
        #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features))
        #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1,
                              hidden_state_features,
                              None,
                              prefix='gru1')[0]
        #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0]
        #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0]

        final_out_recc = T.specify_shape(T.mean(gru_1_out, axis=0),
                                         (mb_size * 2, num_hidden))

        h_out_1 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=1,
                             nonlinearity=None)

        h_out_1_value = h_out_1.get_output_for(final_out_recc)
        h_out_4_value = h_out_4.get_output_for(h_out_1_value)

        raw_y = h_out_4_value
        #raw_y = T.clip(h_out_4_value, -10.0, 10.0)
        classification = T.nnet.sigmoid(raw_y)

        #tf comes before rf.
        p_real = classification[:mb_size]
        p_gen = classification[mb_size:]

        #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r))

        self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean()
        self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean()
        self.d_cost = self.d_cost_real + self.d_cost_gen
        self.g_cost = self.g_cost_d

        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4, trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1, trainable=True)

        self.params += gru_params_1.values()
        #self.params += gru_params_2.values()
        #self.params += gru_params_3.values()

        self.accuracy = T.mean(
            T.eq(T.ones(p_real.shape).flatten(),
                 T.gt(p_real, 0.5).flatten())) + T.mean(
                     T.eq(
                         T.ones(p_gen.shape).flatten(),
                         T.lt(p_gen, 0.5).flatten()))