Python Layers примеры использования

Язык программирования: Python

Пространство имен/Пакет: layers

Класс/Тип: Layers

Примеров на hotexamples.com: 30

Python Layers - 30 примеров найдено. Это лучшие примеры Python кода для layers.Layers, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Layers(16)

conv2d(6)

dense(4)

get_layer(3)

relu(3)

dropout_layer(3)

sequence_length(2)

senti_prediction(2)

senti_loss(2)

senti_Y_input(2)

sent_repr(2)

deconv2d(2)

padded_word_mask(2)

max_pool(2)

lookup(2)

set_do_share(2)

instance_norm(2)

X_input(2)

precision_weighted_sampler(2)

get_corrupted(2)

split(2)

save(1)

padded_char_mask(1)

channel_to_last(1)

bn(1)

biSRU(1)

remove_pad(1)

residual(1)

resize_conv2d(1)

sampler(1)

score(1)

pad(1)

sent_attention(1)

biLSTM(1)

backward(1)

attr_matrix(1)

attention(1)

senti_score(1)

atrous_conv2d(1)

apply_maxpool_mask(1)

pad2d(1)

dcdw_fc(1)

dcdw_conv_l(1)

forward_propagation(1)

dact_fc_layer_outer(1)

denseV2(1)

dense_layer(1)

drelu(1)

dact_fc_layer_inner(1)

dropout_wrapped_gru_cell(1)

Пример #1

Показать файл

    def forward_prop():
        for l, l_info in enumerate(ConvModel.layer_info):
            func_name, func_args = l_info[0], l_info[1:]
            act_in = ConvModel.act_tensor[l]
            act_out, dz_out, maxpool_mask = None, None, None

            # perform layer-wise convolution operation
            if func_name == 'convolve3d':
                z_out_3d = Layers.convolve3d(act_in, ConvModel.w_tensor[l], ConvModel.b_tensor[l], func_args[0],
                                             func_args[1])
                dz_out = Layers.drelu(np.array(z_out_3d))
                act_out = Layers.relu(np.array(z_out_3d))
            # perform mlp layer-wise forward pass operation
            if func_name == 'fc_layer':
                act_in = np.ravel(act_in)
                z_out = np.dot(act_in, ConvModel.w_tensor[l]) + ConvModel.b_tensor[l]
                act_out = getattr(Layers, func_args[0])(np.array(z_out))
                dz_out = getattr(Layers, 'd' + func_args[0])(np.array(z_out))
            # perform layer-wise pooling operation
            if func_name == 'maxpool3d':
                pool_outputs = Layers.maxpool3d(input=act_in, filter_dim=func_args[0], stride=func_args[1])
                act_out = pool_outputs[0]
                maxpool_mask = pool_outputs[1]
                dz_out = Layers.drelu(np.array(act_out))

            ConvModel.act_tensor[l + 1] = np.array(act_out)
            ConvModel.dz_tensor[l] = np.array(dz_out)
            ConvModel.maxpool_masks[l] = np.array(maxpool_mask)

Пример #2

Показать файл

Файл: model.py Проект: GodWriter/SSD-tensorflow

def ssd_multibox_layer(net,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    if normalization > 0:
        net = Layers.l2_normalization(net, scaling=True)

    # Number of anchors
    num_anchors = len(sizes) + len(ratios)

    # Location
    num_loc_pred = num_anchors * 4
    loc_pred = Layers.conv2d(net,
                             net.get_shape[-1],
                             num_loc_pred,
                             3,
                             1,
                             'SAME',
                             'conv_loc',
                             activation_fn=False)
    loc_pred = Layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

    # Class prediction
    pass

Пример #3

Показать файл

Файл: ConvDRAW.py Проект: wonlee2019/ConvDRAW

    def __init__(self, d, lr, lambda_z_wu, read_attn, write_attn, do_classify,
                 do_reconst):

        self.do_classify = do_classify
        """ flags for each regularizor """
        self.do_reconst = do_reconst
        self.read_attn = read_attn
        self.write_attn = write_attn
        """ dataset information """
        self.set_datainfo(d)
        """ external toolkits """
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, self.d, self.encoder)
        self.ii = ImageInterface(_is_3d, self.read_attn, self.write_attn,
                                 GLIMPSE_SIZE_READ, GLIMPSE_SIZE_WRITE, _h, _w,
                                 _c)
        # for refference from get_loss_kl_draw()
        self.T = T
        self.L = L
        self.Z_SIZES = Z_SIZES
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_z_wu = lambda_z_wu
        """sequence of canvases """
        self.cs = [0] * T
        """ initialization """
        self.init_lstms()
        self.init_time_zero()
        """ workaround for variable_scope(reuse=True) """
        self.DO_SHARE = None

Пример #4

Показать файл

Файл: models.py Проект: OwenZhu/Online_Learning_Classifier_Module_Flask

    def _build_model(self):
        w_initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
        b_initializer = tf.constant_initializer(0.1)

        with tf.variable_scope('rnn_block'):
            gru_cell_fw = tf.nn.rnn_cell.MultiRNNCell(
                [Layers.dropout_wrapped_gru_cell(self._dropout_keep_prob,
                                                 num_units=self.opt["rnn_node_num"],
                                                 activation=tf.nn.relu)
                 for _ in range(self.opt["rnn_layer_num"])])
            gru_cell_bw = tf.nn.rnn_cell.MultiRNNCell(
                [Layers.dropout_wrapped_gru_cell(self._dropout_keep_prob,
                                                 num_units=self.opt["rnn_node_num"],
                                                 activation=tf.nn.relu)
                 for _ in range(self.opt["rnn_layer_num"])])

            _, h_state = tf.nn.bidirectional_dynamic_rnn(cell_fw=gru_cell_fw,
                                                         cell_bw=gru_cell_bw,
                                                         inputs=self.emb_sent,
                                                         dtype=tf.float32)
            rnn_output = tf.concat([h_state[0][-1], h_state[1][-1]], axis=1)

        with tf.variable_scope('fully_connected_block'):
            fully_connected_layer_1 = tf.layers.dense(rnn_output,
                                                      self.opt["fully_connected_layer_1_node_num"],
                                                      kernel_initializer=w_initializer,
                                                      bias_initializer=b_initializer,
                                                      activation=tf.nn.relu)
            fully_connected_layer_1 = tf.nn.dropout(fully_connected_layer_1, self._dropout_keep_prob)

            fully_connected_layer_2 = tf.layers.dense(fully_connected_layer_1,
                                                      self.opt["fully_connected_layer_2_node_num"],
                                                      kernel_initializer=w_initializer,
                                                      bias_initializer=b_initializer,
                                                      activation=tf.nn.relu)
            fully_connected_layer_2 = tf.nn.dropout(fully_connected_layer_2, self._dropout_keep_prob)

            fully_connected_layer_3 = tf.layers.dense(fully_connected_layer_2,
                                                      self.opt["fully_connected_layer_3_node_num"],
                                                      kernel_initializer=w_initializer,
                                                      bias_initializer=b_initializer,
                                                      activation=tf.nn.relu)
            fully_connected_layer_3 = tf.nn.dropout(fully_connected_layer_3, self._dropout_keep_prob)

        with tf.name_scope('output'):
            output_layer = tf.layers.dense(fully_connected_layer_3,
                                           self.opt["label_dim"],
                                           kernel_initializer=w_initializer,
                                           bias_initializer=b_initializer,
                                           activation=tf.nn.relu)
            tf.summary.histogram('logits', output_layer)
            self.output = tf.arg_max(output_layer, 1)

        with tf.name_scope('loss'):
            self.loss = \
                tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self._label, logits=output_layer))
            tf.summary.scalar("training_loss", self.loss)

        with tf.name_scope('adam_optimizer'):
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.loss)

Пример #5

Показать файл

Файл: LVAE.py Проект: kiarashza/LVAE

    def __init__(self, d, lr, lambda_z_wu, do_classify, use_kl=True):
        """ model architecture """
        self.MLP_SIZES = [512, 256, 256, 128, 128]
        self.Z_SIZES = [64, 32, 32, 32, 32]
        self.L = L = len(self.MLP_SIZES)

        self.do_classify = do_classify
        """ flags for regularizers """
        self.use_kl = use_kl
        """ data and external toolkits """
        self.d = d  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, d, self.encoder)
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_z_wu = lambda_z_wu
        """ cache for mu and sigma """
        self.e_mus, self.e_logsigmas = [0] * L, [
            0
        ] * L  # q(z_i+1 | z_i), bottom-up inference as Eq.7-9
        self.p_mus, self.p_logsigmas = [0] * L, [
            0
        ] * L  # p(z_i | z_i+1), top-down prior as Eq.1-3
        self.d_mus, self.d_logsigmas = [0] * L, [
            0
        ] * L  # q(z_i | .), bidirectional inference as Eq.17-19

Пример #6

Показать файл

Файл: VAE.py Проект: geosada/LVAT

 def __init__(self, resource):
     """ data and external toolkits """
     self.d = resource.dh  # dataset manager
     self.ls = Layers()
     self.lf = LossFunctions(self.ls, self.d, self.encoder)
     """ placeholders defined outside"""
     if c.DO_TRAIN:
         self.lr = resource.ph['lr']

Пример #7

Показать файл

    def residual_net(inputs, scope, reuse=False):
        with tf.variable_scope(scope, reuse=reuse):
            res1 = Layers.residual(inputs, 128, 3, 1, 'SAME', 'res1')
            res2 = Layers.residual(res1, 128, 3, 1, 'SAME', 'res2')
            res3 = Layers.residual(res2, 128, 3, 1, 'SAME', 'res3')
            res4 = Layers.residual(res3, 128, 3, 1, 'SAME', 'res4')
            res5 = Layers.residual(res4, 128, 3, 1, 'SAME', 'res5')
            # res6 = Layers.residual(res5, 128, 3, 1, 'SAME', 'res6')

        return res5

Пример #8

Показать файл

Файл: PI.py Проект: whgusdn321/PI

 def __init__(self, d, lr, lambda_pi_usl, use_pi):
     """ flags for each regularizor """
     self.use_pi = use_pi
     """ data and external toolkits """
     self.d = d  # dataset manager
     self.ls = Layers()
     self.lf = LossFunctions(self.ls, d, self.encoder)
     """ placeholders defined outside"""
     self.lr = lr
     self.lambda_pi_usl = lambda_pi_usl

Пример #9

Показать файл

Файл: model.py Проект: ttccnu/MLP_Iris_classification

    def __init__(self,
                 num_inputs,
                 num_hidden,
                 num_outputs,
                 learn_rate,
                 h_w=None,
                 h_b=None,
                 o_w=None,
                 o_b=None):
        self.num_inputs = num_inputs
        self.hidden_layer = Layers(num_hidden, h_b)
        self.output_layer = Layers(num_outputs, o_b)

        self.init_weights_i2h(h_w)
        self.init_weights_h2o(o_w)
        self.learn_rate = learn_rate

Пример #10

Показать файл

    def subsampled(inputs, reuse=False):
        # Less border effect
        inputs = Layers.pad(inputs)

        with tf.variable_scope('subsampled', reuse=reuse):
            conv1 = Layers.conv2d(inputs, 3, 32, 9, 1, 'SAME', 'conv1')
            norm1 = Layers.instance_norm(conv1)
            relu1 = Layers.relu(norm1)

            conv2 = Layers.conv2d(relu1, 32, 64, 3, 2, 'SAME', 'conv2')
            norm2 = Layers.instance_norm(conv2)
            relu2 = Layers.relu(norm2)

            conv3 = Layers.conv2d(relu2, 64, 128, 3, 2, 'SAME', 'conv3')
            norm3 = Layers.instance_norm(conv3)
            relu3 = Layers.relu(norm3)

        return relu3

Пример #11

Показать файл

    def __init__(self, is_3d, is_read_attention, is_write_attention, read_n, write_n, h, w, c):
    
        """ to manage do_share flag inside Layers object, ImageInterface has Layers as its own property """
        self.do_share = False
        self.ls       = Layers()
        self.is_3d    = is_3d
        self.read_n   = read_n
        self.write_n  = write_n
        self.h = h
        self.w = w
        self.c = c

        if is_read_attention:
            self.read = self._read_attention
        else:
            self.read = self._read_no_attention
    
        if is_write_attention:
            self.write = self._write_attention
        else:
            self.write = self._write_no_attention

Пример #12

Показать файл

Файл: model.py Проект: zenglh666/SparseNet

    def __init__(self, variable_list, fully_index, sparse_index, sparse_num,
                 input_shape):

        self._variable_list = copy.deepcopy(variable_list)
        self.layers = Layers(
            weight_init=tf.contrib.layers.xavier_initializer(),
            regularizer=tf.contrib.layers.l2_regularizer(
                FLAGS.regularizer_factor),
            bias_init=tf.constant_initializer(0.0))
        self.fully_index = fully_index
        self.sparse_index = sparse_index
        self.sparse_num = sparse_num
        self.index_fix = [0, self.fully_index, self.sparse_index]
        self._input_shape = input_shape

Пример #13

Показать файл

    def upsampling(inputs, reuse=False):
        with tf.variable_scope('upsampling', reuse=reuse):
            deconv1 = Layers.resize_conv2d(inputs, 128, 64, 3, 2, 'SAME', 'deconv1')
            denorm1 = Layers.instance_norm(deconv1)
            derelu1 = Layers.relu(denorm1)

            deconv2 = Layers.resize_conv2d(derelu1, 64, 32, 3, 2, 'SAME', 'deconv2')
            denorm2 = Layers.instance_norm(deconv2)
            derelu2 = Layers.relu(denorm2)

            deconv3 = Layers.resize_conv2d(derelu2, 32, 3, 9, 1, 'SAME', 'deconv3')
            denorm3 = Layers.instance_norm(deconv3)
            detanh3 = tf.nn.tanh(denorm3)

            y = (detanh3 + 1) * 127.5

            # Remove the border effect
            y = Layers.remove_pad(y)

        return y

Пример #14

Показать файл

    def backprop():
        for l in range(len(ConvModel.layer_info) - 1, -1, -1):
            # retrieve dcdact for final layer
            if l == len(ConvModel.layer_info) - 1:
                dact_l = Layers.dact_fc_layer_outer(ConvModel.act_tensor[l + 1], ConvModel.target)
            # retrieve dcdact for layers that link to fc layers
            elif ConvModel.layer_info[l + 1][0] == 'fc_layer':
                dact_l = Layers.dact_fc_layer_inner(ConvModel.w_tensor[l + 1], ConvModel.dz_tensor[l + 1],
                                                    ConvModel.dact_tensor[l + 1]).reshape(ConvModel.dz_tensor[l].shape)
            # retrieve dcdact for layers that link to convolutional layers
            elif ConvModel.layer_info[l + 1][0] == 'convolve3d':
                dact_l = Layers.dact_conv_l(ConvModel.dz_tensor[l], ConvModel.layer_info[l + 1][2],
                                            ConvModel.layer_info[l + 1][1], ConvModel.w_tensor[l + 1],
                                            ConvModel.dz_tensor[l + 1], ConvModel.dact_tensor[l + 1])
            # no backprop operations for layers that link to pooling layers
            else:
                continue

            # retrieve cost derivatives for fully connected layer
            if ConvModel.layer_info[l][0] == 'fc_layer':
                cost_diff = Layers.dcdw_fc(ConvModel.dz_tensor[l], ConvModel.act_tensor[l], dact_l)
            # retrieve cost derivatives for convolutional layer without maxpooling
            elif ConvModel.layer_info[l][0] == 'convolve3d':
                cost_diff = Layers.dcdw_conv_l(ConvModel.act_tensor[l], ConvModel.layer_info[l][2],
                                               ConvModel.layer_info[l][1], ConvModel.w_tensor[l].shape[3],
                                               ConvModel.dz_tensor[l], dact_l)
            # retrieve cost derivatives for convolutional layer with maxpooling
            else:
                dact_l = Layers.apply_maxpool_mask(dact_l, ConvModel.layer_info[l][1], ConvModel.layer_info[l][2],
                                                   ConvModel.maxpool_masks[l])
                cost_diff = Layers.dcdw_conv_l(ConvModel.act_tensor[l - 1], ConvModel.layer_info[l - 1][2],
                                               ConvModel.layer_info[l - 1][1], ConvModel.w_tensor[l - 1].shape[3],
                                               ConvModel.dz_tensor[l - 1], dact_l)

            # store the cost derivatives
            if ConvModel.layer_info[l][0] == 'maxpool3d':
                ConvModel.dw_tensor[l - 1] = cost_diff[0]
                ConvModel.db_tensor[l - 1] = cost_diff[1]
                ConvModel.dact_tensor[l - 1] = cost_diff[2]
            else:
                ConvModel.dw_tensor[l] = cost_diff[0]
                ConvModel.db_tensor[l] = cost_diff[1]
                ConvModel.dact_tensor[l] = cost_diff[2]

Пример #15

Показать файл

Файл: PI.py Проект: whgusdn321/PI

class PI(object):
    def __init__(self, d, lr, lambda_pi_usl, use_pi):
        """ flags for each regularizor """
        self.use_pi = use_pi
        """ data and external toolkits """
        self.d = d  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, d, self.encoder)
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_pi_usl = lambda_pi_usl

    def encoder(self, x, is_train=True, do_update_bn=True):
        """ https://arxiv.org/pdf/1610.02242.pdf """

        if is_train:
            h = self.distort(x)
            h = self.ls.get_corrupted(x, 0.15)
        else:
            h = x

        scope = '1'
        h = self.ls.conv2d(scope + '_1', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 128, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '2'
        h = self.ls.conv2d(scope + '_1', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 256, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '3'
        h = self.ls.conv2d(scope + '_1', h, 512, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2',
                           h,
                           256,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = self.ls.conv2d(scope + '_3',
                           h,
                           128,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = tf.reduce_mean(h, reduction_indices=[1,
                                                 2])  # Global average pooling
        h = self.ls.dense(scope, h, self.d.l)

        return h

    def build_graph_train(self, x_l, y_l, x, is_supervised=True):

        o = dict()  # output
        loss = 0

        logit = self.encoder(x)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            logit_l = self.encoder(
                x_l, is_train=True,
                do_update_bn=False)  # for pyx and vat loss computation
        """ Classification Loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ PI Model Loss """
        if self.use_pi:
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                _, _, o['Lp'] = self.lf.get_loss_pi(x, logit, is_train=True)
                loss += self.lambda_pi_usl * o['Lp']
        else:
            o['Lp'] = tf.constant(0)
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        #self.op = optimizer.minimize(loss)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)
        self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        logit_l = self.encoder(
            x_l, is_train=False,
            do_update_bn=False)  # for pyx and vat loss computation
        """ classification loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ set losses """
        o['loss'] = loss
        self.o_test = o

    def distort(self, x):

        _d = self.d

        def _distort(a_image):
            """
            bounding_boxes: A Tensor of type float32.
                3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. 
            Bounding boxes are supplied and returned as [y_min, x_min, y_max, x_max]
            """
            # shape: [1, 1, 4]
            bounding_boxes = tf.constant([[[1 / 10, 1 / 10, 9 / 10, 9 / 10]]],
                                         dtype=tf.float32)

            begin, size, _ = tf.image.sample_distorted_bounding_box(
                (_d.h, _d.w, _d.c),
                bounding_boxes,
                min_object_covered=(8.5 / 10.0),
                aspect_ratio_range=[7.0 / 10.0, 10.0 / 7.0])

            a_image = tf.slice(a_image, begin, size)
            """ for the purpose of distorting not use tf.image.resize_image_with_crop_or_pad under """
            a_image = tf.image.resize_images(a_image, [_d.h, _d.w])
            """ due to the size of channel returned from tf.image.resize_images is not being given,
                specify it manually. """
            a_image = tf.reshape(a_image, [_d.h, _d.w, _d.c])
            return a_image

        """ process batch times in parallel """
        return tf.map_fn(_distort, x)

Пример #16

Показать файл

 def __init__(self, channel=None):
     self.rng_numpy, self.rng_theano = get_two_rngs()
     self.layers = Layers()
     self.predict = Predict()
     self.channel = channel

Пример #17

Показать файл

Файл: LVAE.py Проект: kiarashza/LVAE

class LVAE(object):
    def __init__(self, d, lr, lambda_z_wu, do_classify, use_kl=True):
        """ model architecture """
        self.MLP_SIZES = [512, 256, 256, 128, 128]
        self.Z_SIZES = [64, 32, 32, 32, 32]
        self.L = L = len(self.MLP_SIZES)

        self.do_classify = do_classify
        """ flags for regularizers """
        self.use_kl = use_kl
        """ data and external toolkits """
        self.d = d  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, d, self.encoder)
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_z_wu = lambda_z_wu
        """ cache for mu and sigma """
        self.e_mus, self.e_logsigmas = [0] * L, [
            0
        ] * L  # q(z_i+1 | z_i), bottom-up inference as Eq.7-9
        self.p_mus, self.p_logsigmas = [0] * L, [
            0
        ] * L  # p(z_i | z_i+1), top-down prior as Eq.1-3
        self.d_mus, self.d_logsigmas = [0] * L, [
            0
        ] * L  # q(z_i | .), bidirectional inference as Eq.17-19

    def encoder(self, x, is_train=True, do_update_bn=True):

        h = x
        for l in range(self.L):
            scope = 'Encode_L' + str(l)
            h = self.ls.dense(scope, h, self.MLP_SIZES[l])
            h = self.ls.bn(scope, h, is_train, do_update_bn, name=scope)
            h = tf.nn.elu(h)
            """ prepare for bidirectional inference """
            _, self.e_mus[l], self.e_logsigmas[l] = self.ls.vae_sampler(
                scope, h, self.Z_SIZES[l], tf.nn.softplus)  # Eq.13-15
        #return h
        return self.e_mus[-1]

    def decoder(self, is_train=True, do_update_bn=True):

        for l in range(self.L - 1, -1, -1):
            scope = 'Decoder_L' + str(l)

            if l == self.L - 1:
                """ At the highest latent layer, mu & sigma are identical to those outputed from encoer.
                    And making actual z is not necessary for the highest layer."""
                mu, logsigma = self.e_mus[l], self.e_logsigmas[l]
                self.d_mus[l], self.d_logsigmas[l] = mu, logsigma

                z = self.ls.sampler(self.d_mus[l], tf.exp(self.d_logsigmas[l]))
                """ prior of z_L is set as standard Gaussian, N(0,I). """
                self.p_mus[l], self.p_logsigmas[l] = tf.zeros(
                    (mu.get_shape())), tf.zeros((logsigma.get_shape()))

            else:
                """ prior is developed from z of the above layer """
                _, self.p_mus[l], self.p_logsigmas[l] = self.ls.vae_sampler(
                    scope, z, self.Z_SIZES[l], tf.nn.softplus)  # Eq.13-15

                z, self.d_mus[l], self.d_logsigmas[
                    l] = self.ls.precision_weighted_sampler(
                        scope, (self.e_mus[l], tf.exp(self.e_logsigmas[l])),
                        (self.p_mus[l], tf.exp(
                            self.p_logsigmas[l])))  # Eq.17-19
        """ go out to the input space """
        _d = self.d
        x = self.ls.dense('bottom', z, _d.img_size,
                          tf.nn.elu)  # reconstructed input

        if _d.is_3d: x = tf.reshape(x, (-1, _d.h, _d.w, _d.c))

        return x

    def build_graph_train(self, x_l, y_l, x):

        o = dict()  # output
        loss = 0

        logit = self.encoder(x)
        x_reconst = self.decoder()

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            logit_l = self.encoder(
                x_l, is_train=True,
                do_update_bn=False)  # for pyx and vat loss computation
        """ Classification Loss """
        if self.do_classify:
            o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
            loss += o['Ly']
        """ for visualizationc """
        o['z'], o['y'] = logit, y_l
        """ p(x|z) Reconstruction Loss """
        o['Lr'] = self.lf.get_loss_pxz(x_reconst, x, 'DiscretizedLogistic')
        loss += o['Lr']
        o['x'] = x
        o['cs'] = x_reconst
        """ VAE KL-Divergence Loss """
        if self.use_kl:
            o['KL1'], o['KL2'], o['Lz'] = self.lf.get_loss_kl(self,
                                                              _lambda=10.0)
            loss += self.lambda_z_wu * o['Lz']
        else:
            o['KL1'], o['KL2'], o['Lz'] = tf.constant(0), tf.constant(
                0), tf.constant(0)
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        #self.op = optimizer.minimize(loss)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)
        #for v in tf.all_variables(): print("%s : %s" % (v.name,v.get_shape()))
        self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        logit_l = self.encoder(
            x_l, is_train=False,
            do_update_bn=False)  # for pyx and vat loss computation
        """ classification loss """
        if self.do_classify:
            o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
            loss += o['Ly']
        """ for visualizationc """
        o['z'], o['y'] = logit_l, y_l
        """ set losses """
        o['loss'] = loss
        self.o_test = o

Пример #18

Показать файл

MAXLEN = 10
HIDDEN_SIZE = 100
EPOCHS = 500
MIN_COUNT = 1
lr = 0.5
pretrained = False
activation = "linear"

helper = DataHelper()
emb_matrix, inputs, targets = helper.trainset_preparation(
    path_data, EMB_DIM, MAXLEN, BATCH_SIZE, MIN_COUNT, pretrained)
VOCAB_SIZE = len(helper.stoi)
helper.save("./binaries/vocab.pkl")

itos = {i: t for t, i in helper.stoi.items()}
lm = Layers(VOCAB_SIZE, EMB_DIM, HIDDEN_SIZE, emb_matrix, activation)


def generate(sent_input, len_sent):
    sent_input = helper.preprocessing(sent_input)
    token = sent_input.split()
    sequence = helper.transform(token)
    for i in range(len_sent):
        inputs = np.array([sequence])
        prob = lm.forward(inputs)[-1][-1]
        index = np.argmax(prob)
        sequence.append(index)
    print(" ".join([itos[idx] for idx in sequence]))


for e in range(EPOCHS):

Пример #19

Показать файл

class Attention(object):
    def __init__(self, channel=None):
        self.rng_numpy, self.rng_theano = get_two_rngs()
        self.layers = Layers()
        self.predict = Predict()
        self.channel = channel

    def load_params(self, path, params):
        # load params from disk
        pp = np.load(path)
        for kk, vv in params.iteritems():
            if kk not in pp:
                raise Warning('%s is not in the archive'%kk)
            params[kk] = pp[kk]

        return params

    def init_params(self, options):
        # all parameters
        params = OrderedDict()
        # embedding
        params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])

        ctx_dim = options['ctx_dim']

        # init_state, init_cell
        params = self.layers.get_layer('ff')[0](params, nin=ctx_dim, nout=options['mu_dim'],
                                                prefix='ff_state')
        params = self.layers.get_layer('ff')[0](params, nin=ctx_dim, nout=options['mu_dim'],
                                                prefix='ff_memory')

        # decoder: LSTM
        params = self.layers.get_layer('lstm')[0](params, nin=options['dim_word'],
                                                  dim=options['tu_dim'], prefix='tu_lstm')
        params = self.layers.get_layer('attend')[0](params, nin=options['tu_dim'],
                                                    dimctx=ctx_dim, prefix='attend')
        params = self.layers.get_layer('lstm_concat')[0](options, params, nin=options['tu_dim'],
                                                         dim=options['mu_dim'], dimctx=ctx_dim,
                                                         prefix='mu_lstm')

        # readout
        params = self.layers.get_layer('ff')[0](params, nin=options['mu_dim'], nout=options['dim_word'],
                                                prefix='ff_logit_lstm')
        if options['ctx2out']:
            params = self.layers.get_layer('ff')[0](params, nin=ctx_dim, nout=options['dim_word'],
                                                    prefix='ff_logit_ctx')

        params = self.layers.get_layer('ff')[0](params, nin=options['dim_word'], nout=options['n_words'],
                                                prefix='ff_logit')
        return params

    def build_model(self, tparams, options):
        trng = RandomStreams(1234)
        use_noise = theano.shared(np.float32(0.))
        # description string: #words x #samples
        x = tensor.matrix('x', dtype='int64')
        mask = tensor.matrix('mask', dtype='float32')
        # context: #samples x #annotations x dim
        ctx = tensor.tensor3('ctx', dtype='float32')
        mask_ctx = tensor.matrix('mask_ctx', dtype='float32')

        n_timesteps = x.shape[0]
        n_samples = x.shape[1]

        # index into the word embedding matrix, shift it forward in time
        emb = tparams['Wemb'][x.flatten()].reshape(
                [n_timesteps, n_samples, options['dim_word']])
        emb_shifted = tensor.zeros_like(emb)
        emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
        emb = emb_shifted

        ctx_ = ctx
        counts = mask_ctx.sum(-1).dimshuffle(0,'x')
        ctx_mean = ctx_.sum(1)/counts

        # initial state/cell
        init_state = self.layers.get_layer('ff')[1](tparams, ctx_mean,
                                                    activ='tanh', prefix='ff_state')
        init_memory = self.layers.get_layer('ff')[1](tparams, ctx_mean,
                                                     activ='tanh', prefix='ff_memory')

        # decoder
        tu_lstm = self.layers.get_layer('lstm')[1](tparams, emb, mask=mask, prefix='tu_lstm')
        attend = self.layers.get_layer('attend')[1](tparams, tu_lstm[0], ctx_)
        mu_lstm = self.layers.get_layer('lstm_concat')[1](options, tparams, tu_lstm[0],
                                                          mask=mask, ctxs=attend[1],
                                                          one_step=False,
                                                          init_state=init_state,
                                                          init_memory=init_memory,
                                                          trng=trng,
                                                          use_noise=use_noise,
                                                          prefix='mu_lstm')

        proj_h = mu_lstm[0]
        betas = mu_lstm[2]
        ctxs = mu_lstm[3]
        alphas = attend[0]
        if options['use_dropout']:
            proj_h = self.layers.dropout_layer(proj_h, use_noise, trng)

        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](tparams, proj_h, activ='linear',
                                               prefix='ff_logit_lstm')
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            logit += self.layers.get_layer('ff')[1](tparams, ctxs, activ='linear',
                                                    prefix='ff_logit_ctx')
        logit = tanh(logit)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise, trng)

        # (t,m,n_words)
        logit = self.layers.get_layer('ff')[1](tparams, logit,
                                               activ='linear', prefix='ff_logit')
        logit_shp = logit.shape
        # (t*m, n_words)
        probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1],
                                                   logit_shp[2]]))

        # cost
        x_flat = x.flatten() # (t*m,)
        cost = -tensor.log(probs[T.arange(x_flat.shape[0]), x_flat] + 1e-8)
        cost = cost.reshape([x.shape[0], x.shape[1]])
        cost = (cost * mask).sum(0)

        extra = [probs, alphas, betas]
        test = [attend[1]]
        return trng, use_noise, x, mask, ctx, mask_ctx, alphas, cost, extra, test

    def pred_probs(self, whichset, f_log_probs, verbose=True):
        probs = []
        n_done = 0
        NLL = []
        L = []
        if whichset == 'train':
            tags = self.engine.train
            iterator = self.engine.kf_train
        elif whichset == 'valid':
            tags = self.engine.valid
            iterator = self.engine.kf_valid
        elif whichset == 'test':
            tags = self.engine.test
            iterator = self.engine.kf_test
        else:
            raise NotImplementedError()
        n_samples = np.sum([len(index) for index in iterator])
        for index in iterator:
            tag = [tags[i] for i in index]
            x, mask, ctx, ctx_mask,vid_names = data_engine.prepare_data(
                self.engine, tag)
            pred_probs = f_log_probs(x, mask, ctx, ctx_mask)
            L.append(mask.sum(0).tolist())
            NLL.append((-1 * pred_probs).tolist())
            probs.append(pred_probs.tolist())
            n_done += len(tag)
            if verbose:
                sys.stdout.write('\rComputing LL on %d/%d examples'%(
                             n_done, n_samples))
                sys.stdout.flush()
        print
        probs = flatten_list_of_list(probs)
        NLL = flatten_list_of_list(NLL)
        L = flatten_list_of_list(L)
        perp = 2**(np.sum(NLL) / np.sum(L) / np.log(2))
        return -1 * np.mean(probs), perp

    def train(self,
              random_seed=1234,
              reload_=False,
              verbose=True,
              debug=True,
              save_model_dir='',
              from_dir=None,
              # dataset
              dataset='youtube2text',
              video_feature='googlenet',
              K=10,
              OutOf=240,
              # network
              dim_word=256, # word vector dimensionality
              ctx_dim=-1, # context vector dimensionality, auto set
              tu_dim=512,
              mu_dim=1024,
              vu_dim=1024,
              n_layers_out=1,
              n_layers_init=1,
              prev2out=False,
              ctx2out=False,
              selector=False,
              n_words=100000,
              maxlen=100, # maximum length of the description
              use_dropout=False,
              isGlobal=False,
              # training
              patience=10,
              max_epochs=5000,
              decay_c=0.,
              alpha_c=0.,
              alpha_entropy_r=0.,
              lrate=0.01,
              optimizer='adadelta',
              clip_c=2.,
              # minibatch
              batch_size = 64,
              valid_batch_size = 64,
              dispFreq=100,
              validFreq=10,
              saveFreq=10, # save the parameters after every saveFreq updates
              sampleFreq=10, # generate some samples after every sampleFreq updates
              # metric
              metric='blue'
              ):

        self.rng_numpy, self.rng_theano = get_two_rngs()

        model_options = locals().copy()
        if 'self' in model_options:
            del model_options['self']
        model_options = validate_options(model_options)
        with open('%smodel_options.pkl'%save_model_dir, 'wb') as f:
            pkl.dump(model_options, f)

        print 'Loading data'
        self.engine = data_engine.Movie2Caption('attention', dataset,
                                                video_feature,
                                                batch_size, valid_batch_size,
                                                maxlen, n_words,
                                                K, OutOf)
        model_options['ctx_dim'] = self.engine.ctx_dim

        print 'init params'
        t0 = time.time()
        params = self.init_params(model_options)

        # reloading
        if reload_:
            model_saved = from_dir+'/model_best_so_far.npz'
            assert os.path.isfile(model_saved)
            print "Reloading model params..."
            params = load_params(model_saved, params)

        tparams = init_tparams(params)
        if verbose:
            print tparams.keys

        trng, use_noise, x, mask, ctx, mask_ctx, alphas, cost, extra, test = \
            self.build_model(tparams, model_options)

        if debug:
            print 'buliding test'
            test_fun = theano.function([x, mask, ctx, mask_ctx],
                                       test,
                                       name='f_test',
                                       on_unused_input='ignore')

        print 'buliding sampler'
        f_init, f_next = self.predict.build_sampler(self.layers, tparams, model_options, use_noise, trng)

        # before any regularizer
        print 'building f_log_probs'
        f_log_probs = theano.function([x, mask, ctx, mask_ctx], -cost,
                                      profile=False, on_unused_input='ignore')

        cost = cost.mean()
        if decay_c > 0.:
            decay_c = theano.shared(np.float32(decay_c), name='decay_c')
            weight_decay = 0.
            for kk, vv in tparams.iteritems():
                weight_decay += (vv ** 2).sum()
            weight_decay *= decay_c
            cost += weight_decay

        if alpha_c > 0.:
            alpha_c = theano.shared(np.float32(alpha_c), name='alpha_c')
            alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
            cost += alpha_reg

        if alpha_entropy_r > 0:
            alpha_entropy_r = theano.shared(np.float32(alpha_entropy_r),
                                            name='alpha_entropy_r')
            alpha_reg_2 = alpha_entropy_r * (-tensor.sum(alphas *
                        tensor.log(alphas+1e-8),axis=-1)).sum(0).mean()
            cost += alpha_reg_2
        else:
            alpha_reg_2 = tensor.zeros_like(cost)
        print 'building f_alpha'
        f_alpha = theano.function([x, mask, ctx, mask_ctx],
                                  [alphas, alpha_reg_2],
                                  name='f_alpha',
                                  on_unused_input='ignore')

        print 'compute grad'
        grads = tensor.grad(cost, wrt=itemlist(tparams))
        if clip_c > 0.:
            g2 = 0.
            for g in grads:
                g2 += (g**2).sum()
            new_grads = []
            for g in grads:
                new_grads.append(tensor.switch(g2 > (clip_c**2),
                                               g / tensor.sqrt(g2) * clip_c,
                                               g))
            grads = new_grads

        lr = tensor.scalar(name='lr')
        print 'build train fns'
        f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads,
                                                  [x, mask, ctx, mask_ctx], cost,
                                                  extra + grads)

        print 'compilation took %.4f sec'%(time.time()-t0)
        print 'Optimization'

        history_errs = []
        # reload history
        if reload_:
            print 'loading history error...'
            history_errs = np.load(
                from_dir+'model_best_so_far.npz')['history_errs'].tolist()

        bad_counter = 0

        processes = None
        queue = None
        rqueue = None
        shared_params = None

        uidx = 0
        uidx_best_blue = 0
        uidx_best_valid_err = 0
        estop = False
        best_p = unzip(tparams)
        best_blue_valid = 0
        best_valid_err = 999
        alphas_ratio = []
        for eidx in xrange(max_epochs):
            n_samples = 0
            train_costs = []
            grads_record = []
            print 'Epoch ', eidx
            for idx in self.engine.kf_train:
                tags = [self.engine.train[index] for index in idx]
                n_samples += len(tags)
                uidx += 1
                use_noise.set_value(1.)

                pd_start = time.time()
                x, mask, ctx, ctx_mask,vid_names = data_engine.prepare_data(
                    self.engine, tags)

                if debug:
                    datas = test_fun(x, mask, ctx, ctx_mask)
                    for item in datas:
                        print item[0].shape

                pd_duration = time.time() - pd_start
                if x is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    continue

                ud_start = time.time()
                rvals = f_grad_shared(x, mask, ctx, ctx_mask)
                cost = rvals[0]
                probs = rvals[1]
                alphas = rvals[2]
                betas = rvals[3]
                grads = rvals[4:]
                grads, NaN_keys = grad_nan_report(grads, tparams)
                if len(grads_record) >= 5:
                    del grads_record[0]
                grads_record.append(grads)
                if NaN_keys != []:
                    print 'grads contain NaN'
                    import pdb; pdb.set_trace()
                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected in cost'
                    import pdb; pdb.set_trace()
                # update params
                f_update(lrate)
                ud_duration = time.time() - ud_start

                if eidx == 0:
                    train_error = cost
                else:
                    train_error = train_error * 0.95 + cost * 0.05
                train_costs.append(cost)

                if np.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', eidx, ', Update ', uidx, \
                        ', Train cost mean so far', train_error, \
                        ', betas mean', np.round(betas.mean(), 3), \
                        ', fetching data time spent (sec)', np.round(pd_duration, 3), \
                        ', update time spent (sec)', np.round(ud_duration, 3)
                    alphas,reg = f_alpha(x,mask,ctx,ctx_mask)
                    print 'alpha ratio %.3f, reg %.3f' % (
                        alphas.min(-1).mean() / (alphas.max(-1)).mean(), reg)

                if np.mod(uidx, saveFreq) == 0:
                    pass

                if np.mod(uidx, sampleFreq) == 0:
                    use_noise.set_value(0.)
                    print '------------- sampling from train ----------'
                    self.predict.sample_execute(self.engine, model_options, tparams,
                                                f_init, f_next, x, ctx, ctx_mask, trng,vid_names)

                    print '------------- sampling from valid ----------'
                    idx = self.engine.kf_valid[np.random.randint(1, len(self.engine.kf_valid) - 1)]
                    tags = [self.engine.valid[index] for index in idx]
                    x_s, mask_s, ctx_s, mask_ctx_s,vid_names = data_engine.prepare_data(self.engine, tags)
                    self.predict.sample_execute(self.engine, model_options, tparams,
                                                f_init, f_next, x_s, ctx_s, mask_ctx_s, trng, vid_names)
                    # end of sample

                if validFreq != -1 and np.mod(uidx, validFreq) == 0:
                    t0_valid = time.time()
                    alphas,_ = f_alpha(x, mask, ctx, ctx_mask)
                    ratio = alphas.min(-1).mean()/(alphas.max(-1)).mean()
                    alphas_ratio.append(ratio)
                    np.savetxt(save_model_dir+'alpha_ratio.txt',alphas_ratio)

                    current_params = unzip(tparams)
                    np.savez(save_model_dir+'model_current.npz',
                             history_errs=history_errs, **current_params)

                    use_noise.set_value(0.)
                    train_err = -1
                    train_perp = -1
                    valid_err = -1
                    valid_perp = -1
                    test_err = -1
                    test_perp = -1

                    if not debug:
                        # first compute train cost
                        if 0:
                            print 'computing cost on trainset'
                            train_err, train_perp = self.pred_probs(
                                    'train', f_log_probs,
                                    verbose=model_options['verbose'])
                        else:
                            train_err = 0.
                            train_perp = 0.
                        if 1:
                            print 'validating...'
                            valid_err, valid_perp = self.pred_probs(
                                'valid', f_log_probs,
                                verbose=model_options['verbose'],
                                )
                        else:
                            valid_err = 0.
                            valid_perp = 0.
                        if 0:
                            print 'testing...'
                            test_err, test_perp = self.pred_probs(
                                'test', f_log_probs,
                                verbose=model_options['verbose']
                                )
                        else:
                            test_err = 0.
                            test_perp = 0.

                    mean_ranking = 0
                    blue_t0 = time.time()
                    scores, processes, queue, rqueue, shared_params = \
                        metrics.compute_score(model_type='attention',
                                              model_archive=current_params,
                                              options=model_options,
                                              engine=self.engine,
                                              save_dir=save_model_dir,
                                              beam=5, n_process=5,
                                              whichset='both',
                                              on_cpu=False,
                                              processes=processes, queue=queue, rqueue=rqueue,
                                              shared_params=shared_params, metric=metric,
                                              one_time=False,
                                              f_init=f_init, f_next=f_next, model=self.predict
                                              )

                    valid_B1 = scores['valid']['Bleu_1']
                    valid_B2 = scores['valid']['Bleu_2']
                    valid_B3 = scores['valid']['Bleu_3']
                    valid_B4 = scores['valid']['Bleu_4']
                    valid_Rouge = scores['valid']['ROUGE_L']
                    valid_Cider = scores['valid']['CIDEr']
                    valid_meteor = scores['valid']['METEOR']
                    test_B1 = scores['test']['Bleu_1']
                    test_B2 = scores['test']['Bleu_2']
                    test_B3 = scores['test']['Bleu_3']
                    test_B4 = scores['test']['Bleu_4']
                    test_Rouge = scores['test']['ROUGE_L']
                    test_Cider = scores['test']['CIDEr']
                    test_meteor = scores['test']['METEOR']
                    print 'computing meteor/blue score used %.4f sec, '\
                          'blue score: %.1f, meteor score: %.1f'%(
                    time.time()-blue_t0, valid_B4, valid_meteor)
                    history_errs.append([eidx, uidx, train_perp, train_err,
                                         valid_perp, valid_err,
                                         test_perp, test_err,
                                         valid_B1, valid_B2, valid_B3,
                                         valid_B4, valid_meteor, valid_Rouge, valid_Cider,
                                         test_B1, test_B2, test_B3,
                                         test_B4, test_meteor, test_Rouge, test_Cider])
                    np.savetxt(save_model_dir+'train_valid_test.txt',
                               history_errs, fmt='%.3f')
                    print 'save validation results to %s'%save_model_dir
                    # save best model according to the best blue or meteor
                    if len(history_errs) > 1 and valid_B4 > np.array(history_errs)[:-1, 11].max():
                        print 'Saving to %s...'%save_model_dir,
                        np.savez(
                            save_model_dir+'model_best_blue_or_meteor.npz',
                            history_errs=history_errs, **best_p)
                    if len(history_errs) > 1 and valid_err < np.array(history_errs)[:-1, 5].min():
                        best_p = unzip(tparams)
                        bad_counter = 0
                        best_valid_err = valid_err
                        uidx_best_valid_err = uidx

                        print 'Saving to %s...'%save_model_dir,
                        np.savez(
                            save_model_dir+'model_best_so_far.npz',
                            history_errs=history_errs, **best_p)
                        with open('%smodel_options.pkl'%save_model_dir, 'wb') as f:
                            pkl.dump(model_options, f)
                        print 'Done'
                    elif len(history_errs) > 1 and valid_err >= np.array(history_errs)[:-1, 5].min():
                        bad_counter += 1
                        print 'history best ', np.array(history_errs)[:,6].min()
                        print 'bad_counter ', bad_counter
                        print 'patience ', patience
                        if bad_counter > patience:
                            print 'Early Stop!'
                            estop = True
                            break

                    if test_B4 > 0.48 and test_meteor > 0.32:
                        print 'Saving to %s...' % save_model_dir,
                        numpy.savez(
                            save_model_dir + 'model_' + str(uidx) + '.npz',
                            history_errs=history_errs, **current_params)

                    if self.channel:
                        self.channel.save()

                    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err, \
                          'best valid err so far',best_valid_err
                    print 'valid took %.2f sec'%(time.time() - t0_valid)
                    # end of validatioin
                if debug:
                    break
            if estop:
                break
            if debug:
                break

            # end for loop over minibatches
            print 'This epoch has seen %d samples, train cost %.2f'%(
                n_samples, np.mean(train_costs))
        # end for loop over epochs
        print 'Optimization ended.'
        if best_p is not None:
            zipp(best_p, tparams)

        print 'stopped at epoch %d, minibatch %d, '\
              'curent Train %.2f, current Valid %.2f, current Test %.2f '%(
               eidx, uidx, np.mean(train_err), np.mean(valid_err), np.mean(test_err))
        params = copy.copy(best_p)
        np.savez(save_model_dir+'model_best.npz',
                 train_err=train_err,
                 valid_err=valid_err, test_err=test_err, history_errs=history_errs,
                 **params)

        if history_errs != []:
            history = np.asarray(history_errs)
            best_valid_idx = history[:,6].argmin()
            np.savetxt(save_model_dir+'train_valid_test.txt', history, fmt='%.4f')
            print 'final best exp ', history[best_valid_idx]

        return train_err, valid_err, test_err

Пример #20

Показать файл

Файл: game.py Проект: FredslundMagnus/Bachelorprojekt

 def __init__(self,
              batch: int = None,
              hours: float = None,
              width: int = None,
              height: int = None,
              level: Levels = None,
              reset_chance: float = None,
              failed_actions_chance: float = None,
              **kwargs) -> None:
     super().__init__()
     self.batch: int = batch
     self.hours: float = hours
     self.level: Levels = level
     self.uses = {
         Levels.Causal1: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Keys, LayerType.Door
         },
         Levels.Causal2: {
             LayerType.Blocks, LayerType.Goal, LayerType.Diamond1,
             LayerType.Diamond2, LayerType.Diamond3, LayerType.Diamond4
         },
         Levels.Causal3: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Bluedoor, LayerType.Bluekeys, LayerType.Reddoor,
             LayerType.Redkeys
         },
         Levels.Causal4: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Bluedoor, LayerType.Bluekeys, LayerType.Reddoor,
             LayerType.Redkeys, LayerType.Rock, LayerType.Dirt
         },
         Levels.Rocks:
         {LayerType.Blocks, LayerType.Goal, LayerType.Rock, LayerType.Dirt},
         Levels.Maze: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Door, LayerType.Keys, LayerType.Holder,
             LayerType.Putter
         },
         Levels.Causal5: {
             LayerType.Blocks, LayerType.Goal, LayerType.Brown1,
             LayerType.Brown2, LayerType.Brown3, LayerType.Pink1,
             LayerType.Pink2, LayerType.Pink3
         },
         Levels.Coconuts: {
             LayerType.Blocks, LayerType.Goal, LayerType.Rock,
             LayerType.Dirt, LayerType.Gold, LayerType.Coconut
         },
         Levels.Causal6: {
             LayerType.Blocks, LayerType.Goal, LayerType.Greendown,
             LayerType.Greenup, LayerType.Greenstar, LayerType.Yellowstar,
             LayerType.Bluestar
         },
         Levels.SuperLevel: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Bluedoor, LayerType.Bluekeys, LayerType.Reddoor,
             LayerType.Redkeys, LayerType.Rock, LayerType.Dirt,
             LayerType.Coconut, LayerType.Door, LayerType.Keys
         },
         Levels.SuperLevel2: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Bluedoor, LayerType.Bluekeys, LayerType.Reddoor,
             LayerType.Redkeys, LayerType.Rock, LayerType.Dirt,
             LayerType.Coconut
         },
         Levels.MonsterLevel: {
             LayerType.Blocks, LayerType.Goal, LayerType.Gold,
             LayerType.Monster, LayerType.Rock
         },
         Levels.Causal7: {
             LayerType.Blocks, LayerType.Goal, LayerType.Greencross,
             LayerType.Bluecross, LayerType.Redcross, LayerType.Purplecross
         },
         Levels.CausalSuper: {
             LayerType.Blocks, LayerType.Goal, LayerType.Super1,
             LayerType.Super2, LayerType.Super3, LayerType.Super4,
             LayerType.Super5, LayerType.Super6, LayerType.Super7
         }
     }
     convert = {(use, [
         layer for layer in LayerType if layer.name == name.split('_')[1]
     ][0])
                for name, use in kwargs.items()
                if name.split('_')[0] == "layer"}
     self.layers: Layers = Layers(
         batch, width, height, level, reset_chance, failed_actions_chance,
         *[
             layer for use, layer in convert
             if use and (layer in self.uses[level])
         ])
     for i in range(width):
         for j in range(height):
             for k in range(batch):
                 self.layers.all_items[k][(i, j)] = 0
     self.layers.update(isFirstTime=True)

Пример #21

Показать файл

Файл: train_model.py Проект: lixiangpengcs/Spatial-Temporal-Adaptive-Attention-for-Video-Captioning

def train(
        random_seed=1234,
        dim_word=256,  # word vector dimensionality
        ctx_dim=-1,  # context vector dimensionality, auto set
        dim=1000,  # the number of LSTM units
        n_layers_out=1,
        n_layers_init=1,
        encoder='none',
        encoder_dim=100,
        prev2out=False,
        ctx2out=False,
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        alpha_entropy_r=0.,
        lrate=0.01,
        selector=False,
        n_words=100000,
        maxlen=100,  # maximum length of the description
        optimizer='adadelta',
        clip_c=2.,
        batch_size=64,
        valid_batch_size=64,
        save_model_dir='/data/lisatmp3/yaoli/exp/capgen_vid/attention/test/',
        validFreq=10,
        saveFreq=10,  # save the parameters after every saveFreq updates
        sampleFreq=10,  # generate some samples after every sampleFreq updates
        metric='blue',
        dataset='youtube2text',
        video_feature='googlenet',
        use_dropout=False,
        reload_=False,
        from_dir=None,
        K=10,
        OutOf=240,
        verbose=True,
        debug=True):
    rng_numpy, rng_theano = utils.get_two_rngs()

    model_options = locals().copy()
    if 'self' in model_options:
        del model_options['self']
    with open('%smodel_options.pkl' % save_model_dir, 'wb') as f:
        pkl.dump(model_options, f)

    # instance model
    layers = Layers()
    model = Model()

    print 'Loading data'
    engine = data_engine.Movie2Caption('attention', dataset, video_feature,
                                       batch_size, valid_batch_size, maxlen,
                                       n_words, K, OutOf)
    model_options['ctx_dim'] = engine.ctx_dim
    model_options['n_words'] = engine.n_words
    print 'n_words:', model_options['n_words']

    # set test values, for debugging
    idx = engine.kf_train[0]
    [x_tv, mask_tv, ctx_tv, ctx_mask_tv
     ] = data_engine.prepare_data(engine,
                                  [engine.train[index] for index in idx])

    print 'init params'
    t0 = time.time()
    params = model.init_params(model_options)

    # reloading
    if reload_:
        model_saved = from_dir + '/model_best_so_far.npz'
        assert os.path.isfile(model_saved)
        print "Reloading model params..."
        params = utils.load_params(model_saved, params)

    tparams = utils.init_tparams(params)

    trng, use_noise, \
          x, mask, ctx, mask_ctx, \
          cost, extra = \
          model.build_model(tparams, model_options)
    alphas = extra[1]
    betas = extra[2]
    print 'buliding sampler'
    f_init, f_next = model.build_sampler(tparams, model_options, use_noise,
                                         trng)
    # before any regularizer
    print 'building f_log_probs'
    f_log_probs = theano.function([x, mask, ctx, mask_ctx],
                                  -cost,
                                  profile=False,
                                  on_unused_input='ignore')

    cost = cost.mean()
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    if alpha_c > 0.:
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * ((1. - alphas.sum(0))**2).sum(-1).mean()
        cost += alpha_reg

    if alpha_entropy_r > 0:
        alpha_entropy_r = theano.shared(numpy.float32(alpha_entropy_r),
                                        name='alpha_entropy_r')
        alpha_reg_2 = alpha_entropy_r * (-tensor.sum(
            alphas * tensor.log(alphas + 1e-8), axis=-1)).sum(-1).mean()
        cost += alpha_reg_2
    else:
        alpha_reg_2 = tensor.zeros_like(cost)
    print 'building f_alpha'
    f_alpha = theano.function([x, mask, ctx, mask_ctx], [alphas, betas],
                              name='f_alpha',
                              on_unused_input='ignore')

    print 'compute grad'
    grads = tensor.grad(cost, wrt=utils.itemlist(tparams))
    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (clip_c**2), g / tensor.sqrt(g2) * clip_c,
                              g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'build train fns'
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads,
                                              [x, mask, ctx, mask_ctx], cost,
                                              extra + grads)

    print 'compilation took %.4f sec' % (time.time() - t0)
    print 'Optimization'

    history_errs = []
    # reload history
    if reload_:
        print 'loading history error...'
        history_errs = numpy.load(
            from_dir + 'model_best_so_far.npz')['history_errs'].tolist()

    bad_counter = 0

    processes = None
    queue = None
    rqueue = None
    shared_params = None

    uidx = 0
    uidx_best_blue = 0
    uidx_best_valid_err = 0
    estop = False
    best_p = utils.unzip(tparams)
    best_blue_valid = 0
    best_valid_err = 999
    alphas_ratio = []
    for eidx in xrange(max_epochs):
        n_samples = 0
        train_costs = []
        grads_record = []
        print 'Epoch ', eidx
        for idx in engine.kf_train:
            tags = [engine.train[index] for index in idx]
            n_samples += len(tags)
            uidx += 1
            use_noise.set_value(1.)

            pd_start = time.time()
            x, mask, ctx, ctx_mask = data_engine.prepare_data(engine, tags)
            pd_duration = time.time() - pd_start
            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                continue

            ud_start = time.time()
            rvals = f_grad_shared(x, mask, ctx, ctx_mask)
            cost = rvals[0]
            probs = rvals[1]
            alphas = rvals[2]
            betas = rvals[3]
            grads = rvals[4:]
            grads, NaN_keys = utils.grad_nan_report(grads, tparams)
            if len(grads_record) >= 5:
                del grads_record[0]
            grads_record.append(grads)
            if NaN_keys != []:
                print 'grads contain NaN'
                import pdb
                pdb.set_trace()
            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected in cost'
                import pdb
                pdb.set_trace()
            # update params
            f_update(lrate)
            ud_duration = time.time() - ud_start

            if eidx == 0:
                train_error = cost
            else:
                train_error = train_error * 0.95 + cost * 0.05
            train_costs.append(cost)

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Train cost mean so far', \
                  train_error, 'fetching data time spent (sec)', pd_duration, \
                  'update time spent (sec)', ud_duration, 'save_dir', save_model_dir
                alphas, betas = f_alpha(x, mask, ctx, ctx_mask)
                counts = mask.sum(0)
                betas_mean = (betas * mask).sum(0) / counts
                betas_mean = betas_mean.mean()
                print 'alpha ratio %.3f, betas mean %.3f' % (
                    alphas.min(-1).mean() /
                    (alphas.max(-1)).mean(), betas_mean)
                l = 0
                for vv in x[:, 0]:
                    if vv == 0:
                        break
                    if vv in engine.word_idict:
                        print '(', numpy.round(betas[l, 0],
                                               3), ')', engine.word_idict[vv],
                    else:
                        print '(', numpy.round(betas[l, 0], 3), ')', 'UNK',
                    l += 1
                print '(', numpy.round(betas[l, 0], 3), ')'

            if numpy.mod(uidx, saveFreq) == 0:
                pass

            if numpy.mod(uidx, sampleFreq) == 0:
                use_noise.set_value(0.)
                print '------------- sampling from train ----------'
                x_s = x
                mask_s = mask
                ctx_s = ctx
                ctx_mask_s = ctx_mask
                model.sample_execute(engine, model_options, tparams, f_init,
                                     f_next, x_s, ctx_s, ctx_mask_s, trng)
                print '------------- sampling from valid ----------'
                idx = engine.kf_valid[numpy.random.randint(
                    1,
                    len(engine.kf_valid) - 1)]
                tags = [engine.valid[index] for index in idx]
                x_s, mask_s, ctx_s, mask_ctx_s = data_engine.prepare_data(
                    engine, tags)
                model.sample_execute(engine, model_options, tparams, f_init,
                                     f_next, x_s, ctx_s, mask_ctx_s, trng)

            if validFreq != -1 and numpy.mod(uidx, validFreq) == 0:
                t0_valid = time.time()
                alphas, _ = f_alpha(x, mask, ctx, ctx_mask)
                ratio = alphas.min(-1).mean() / (alphas.max(-1)).mean()
                alphas_ratio.append(ratio)
                numpy.savetxt(save_model_dir + 'alpha_ratio.txt', alphas_ratio)

                current_params = utils.unzip(tparams)
                numpy.savez(save_model_dir + 'model_current.npz',
                            history_errs=history_errs,
                            **current_params)

                use_noise.set_value(0.)
                train_err = -1
                train_perp = -1
                valid_err = -1
                valid_perp = -1
                test_err = -1
                test_perp = -1
                if not debug:
                    # first compute train cost
                    if 0:
                        print 'computing cost on trainset'
                        train_err, train_perp = model.pred_probs(
                            engine,
                            'train',
                            f_log_probs,
                            verbose=model_options['verbose'])
                    else:
                        train_err = 0.
                        train_perp = 0.
                    if 1:
                        print 'validating...'
                        valid_err, valid_perp = model.pred_probs(
                            engine,
                            'valid',
                            f_log_probs,
                            verbose=model_options['verbose'],
                        )
                    else:
                        valid_err = 0.
                        valid_perp = 0.
                    if 1:
                        print 'testing...'
                        test_err, test_perp = model.pred_probs(
                            engine,
                            'test',
                            f_log_probs,
                            verbose=model_options['verbose'])
                    else:
                        test_err = 0.
                        test_perp = 0.

                mean_ranking = 0
                blue_t0 = time.time()
                scores, processes, queue, rqueue, shared_params = \
                    metrics.compute_score(
                    model_type='attention',
                    model_archive=current_params,
                    options=model_options,
                    engine=engine,
                    save_dir=save_model_dir,
                    beam=5, n_process=5,
                    whichset='both',
                    on_cpu=False,
                    processes=processes, queue=queue, rqueue=rqueue,
                    shared_params=shared_params, metric=metric,
                    one_time=False,
                    f_init=f_init, f_next=f_next, model=model
                    )
                '''
                 {'blue': {'test': [-1], 'valid': [77.7, 60.5, 48.7, 38.5, 38.3]},
                 'alternative_valid': {'Bleu_3': 0.40702270203174923,
                 'Bleu_4': 0.29276570520368456,
                 'CIDEr': 0.25247168210607884,
                 'Bleu_2': 0.529069629270047,
                 'Bleu_1': 0.6804308797115253,
                 'ROUGE_L': 0.51083584331688392},
                 'meteor': {'test': [-1], 'valid': [0.282787550236724]}}
                '''

                valid_B1 = scores['valid']['Bleu_1']
                valid_B2 = scores['valid']['Bleu_2']
                valid_B3 = scores['valid']['Bleu_3']
                valid_B4 = scores['valid']['Bleu_4']
                valid_Rouge = scores['valid']['ROUGE_L']
                valid_Cider = scores['valid']['CIDEr']
                valid_meteor = scores['valid']['METEOR']
                test_B1 = scores['test']['Bleu_1']
                test_B2 = scores['test']['Bleu_2']
                test_B3 = scores['test']['Bleu_3']
                test_B4 = scores['test']['Bleu_4']
                test_Rouge = scores['test']['ROUGE_L']
                test_Cider = scores['test']['CIDEr']
                test_meteor = scores['test']['METEOR']
                print 'computing meteor/blue score used %.4f sec, '\
                  'blue score: %.1f, meteor score: %.1f'%(
                time.time()-blue_t0, valid_B4, valid_meteor)
                history_errs.append([
                    eidx, uidx, train_err, train_perp, valid_perp, test_perp,
                    valid_err, test_err, valid_B1, valid_B2, valid_B3,
                    valid_B4, valid_meteor, valid_Rouge, valid_Cider, test_B1,
                    test_B2, test_B3, test_B4, test_meteor, test_Rouge,
                    test_Cider
                ])
                numpy.savetxt(save_model_dir + 'train_valid_test.txt',
                              history_errs,
                              fmt='%.3f')
                print 'save validation results to %s' % save_model_dir
                # save best model according to the best blue or meteor
                if len(history_errs) > 1 and \
                  valid_B4 > numpy.array(history_errs)[:-1,11].max():
                    print 'Saving to %s...' % save_model_dir,
                    numpy.savez(save_model_dir +
                                'model_best_blue_or_meteor.npz',
                                history_errs=history_errs,
                                **best_p)
                if len(history_errs) > 1 and \
                  valid_err < numpy.array(history_errs)[:-1,6].min():
                    best_p = utils.unzip(tparams)
                    bad_counter = 0
                    best_valid_err = valid_err
                    uidx_best_valid_err = uidx

                    print 'Saving to %s...' % save_model_dir,
                    numpy.savez(save_model_dir + 'model_best_so_far.npz',
                                history_errs=history_errs,
                                **best_p)
                    with open('%smodel_options.pkl' % save_model_dir,
                              'wb') as f:
                        pkl.dump(model_options, f)
                    print 'Done'
                elif len(history_errs) > 1 and \
                    valid_err >= numpy.array(history_errs)[:-1,6].min():
                    bad_counter += 1
                    print 'history best ', numpy.array(history_errs)[:,
                                                                     6].min()
                    print 'bad_counter ', bad_counter
                    print 'patience ', patience
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if test_B4 > 0.52 and test_meteor > 0.32:
                    print 'Saving to %s...' % save_model_dir,
                    numpy.savez(save_model_dir + 'model_' + str(uidx) + '.npz',
                                history_errs=history_errs,
                                **current_params)

                print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err, \
                  'best valid err so far',best_valid_err
                print 'valid took %.2f sec' % (time.time() - t0_valid)
                # end of validatioin
            if debug:
                break
        if estop:
            break
        if debug:
            break

        # end for loop over minibatches
        print 'This epoch has seen %d samples, train cost %.2f' % (
            n_samples, numpy.mean(train_costs))
    # end for loop over epochs
    print 'Optimization ended.'
    if best_p is not None:
        utils.zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = 0
    test_err = 0
    if not debug:
        #if valid:
        valid_err, valid_perp = model.pred_probs(
            engine, 'valid', f_log_probs, verbose=model_options['verbose'])
        #if test:
        #test_err, test_perp = self.pred_probs(
        #    'test', f_log_probs,
        #    verbose=model_options['verbose'])


    print 'stopped at epoch %d, minibatch %d, '\
      'curent Train %.2f, current Valid %.2f, current Test %.2f '%(
          eidx,uidx,numpy.mean(train_err),numpy.mean(valid_err),numpy.mean(test_err))
    params = copy.copy(best_p)
    numpy.savez(save_model_dir + 'model_best.npz',
                train_err=train_err,
                valid_err=valid_err,
                test_err=test_err,
                history_errs=history_errs,
                **params)

    if history_errs != []:
        history = numpy.asarray(history_errs)
        best_valid_idx = history[:, 6].argmin()
        numpy.savetxt(save_model_dir + 'train_valid_test.txt',
                      history,
                      fmt='%.4f')
        print 'final best exp ', history[best_valid_idx]

    return train_err, valid_err, test_err

Пример #22

Показать файл

def train(random_seed=1234,
          dim_word=256, # word vector dimensionality
          ctx_dim=-1, # context vector dimensionality, auto set
          dim=1000, # the number of LSTM units
          n_layers_out=1,
          n_layers_init=1,
          encoder='none',
          encoder_dim=100,
          prev2out=False,
          ctx2out=False,
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          alpha_entropy_r=0.,
          lrate=0.01,
          selector=False,
          n_words=100000,
          maxlen=100, # maximum length of the description
          optimizer='adadelta',
          clip_c=2.,
          batch_size = 64,
          valid_batch_size = 64,
          save_model_dir='/data/lisatmp3/yaoli/exp/capgen_vid/attention/test/',
          validFreq=10,
          saveFreq=10, # save the parameters after every saveFreq updates
          sampleFreq=10, # generate some samples after every sampleFreq updates
          metric='blue',
          dataset='youtube2text',
          video_feature='googlenet',
          use_dropout=False,
          reload_=False,
          from_dir=None,
          K1=10,
          K2=10,
          OutOf=240,
          verbose=True,
          debug=True
          ):
    rng_numpy, rng_theano = utils.get_two_rngs()

    model_options = locals().copy()
    model_options_c = locals().copy()
    if 'self' in model_options:
        del model_options['self']
    with open('model_files/model_options.pkl', 'wb') as f:
        pkl.dump(model_options, f)
    with open('model_files/model_options_c3d.pkl', 'wb') as f:
        pkl.dump(model_options_c, f)

    # instance model
    layers = Layers()
    model = Model()
    model_c = Model()

    print 'Loading data'
    engine = data_engine.Movie2Caption('attention', dataset,
                                       video_feature,
                                       batch_size, valid_batch_size,
                                       maxlen, n_words,
                                       K1, K2, OutOf)
    model_options['ctx_dim'] = engine.ctx_dim
    model_options_c['ctx_dim'] = engine.ctx_dim_c
    model_options['n_words'] = engine.n_words
    model_options_c['n_words'] = engine.n_words
    print 'n_words:', model_options['n_words']
    print model_options_c['dim'],model_options_c['ctx_dim']

    # set test values, for debugging
    idx = engine.kf_train[0]
    [x_tv, mask_tv,
     ctx_tv, ctx_mask_tv,
     ctx_tv_c, ctx_mask_tv_c] = data_engine.prepare_data(
        engine, [engine.train[index] for index in idx])

    print 'init params'
    t0 = time.time()
    params = model.init_params(model_options)
    params_c = model_c.init_params(model_options_c)
    # reloading
    model_saved = 'model_files/model_resnet.npz'
    model_saved_c = 'model_files/model_c3d.npz'
    assert os.path.isfile(model_saved)
    print "Reloading model params..."
    params = utils.load_params(model_saved, params)
    params_c = utils.load_params(model_saved_c, params_c)

    tparams = utils.init_tparams(params)
    tparams_c = utils.init_tparams(params_c)

    trng, use_noise, \
          x, mask, ctx, mask_ctx, \
          cost, extra = \
          model.build_model(tparams, model_options)
    alphas = extra[1]
    betas = extra[2]

    trng_c, use_noise_c, \
    x_c, mask_c, ctx_c, mask_ctx_c, \
    cost_c, extra_c = \
        model_c.build_model(tparams_c, model_options_c)


    alphas_c = extra_c[1]
    betas_c = extra_c[2]

    print 'buliding sampler'
    f_init, f_next = model.build_sampler(tparams, model_options, use_noise, trng)
    f_init_c, f_next_c = model_c.build_sampler(tparams_c, model_options_c, use_noise_c, trng_c)
    # before any regularizer
    print 'building f_log_probs'
    f_log_probs = theano.function([x, mask, ctx, mask_ctx], -cost,
                                  profile=False, on_unused_input='ignore')
    f_log_probs_c = theano.function([x_c, mask_c, ctx_c, mask_ctx_c], -cost_c,
                                  profile=False, on_unused_input='ignore')

    bad_counter = 0

    processes = None
    queue = None
    rqueue = None
    shared_params = None

    uidx = 0
    uidx_best_blue = 0
    uidx_best_valid_err = 0
    estop = False
    best_p = utils.unzip(tparams)
    best_blue_valid = 0
    best_valid_err = 999
    alphas_ratio = []
    for eidx in xrange(max_epochs):
        n_samples = 0
        train_costs = []
        grads_record = []
        print 'Epoch ', eidx
        for idx in engine.kf_train:
            tags = [engine.train[index] for index in idx]
            n_samples += len(tags)
            use_noise.set_value(1.)

            pd_start = time.time()
            x, mask, ctx, ctx_mask, ctx_c, ctx_mask_c = data_engine.prepare_data(
                engine, tags)
            #print 'x:',x.shape,'ctx:',ctx.shape,'ctx_c:',ctx_c.shape
            pd_duration = time.time() - pd_start
            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                continue

            if numpy.mod(uidx, saveFreq) == 0:
                pass

            if numpy.mod(uidx, sampleFreq) == 0:
                use_noise.set_value(0.)
                print '------------- sampling from train ----------'
                x_s = x
                mask_s = mask
                ctx_s = ctx
                ctx_s_c = ctx_c
                ctx_mask_s = ctx_mask
                ctx_mask_s_c = ctx_mask_c
                model.sample_execute_ensemble(engine, model_options,model_options_c, tparams,tparams_c,
                                          f_init,f_init_c, f_next,f_next_c, x_s, ctx_s,
                                          ctx_mask_s, ctx_s_c, ctx_mask_s_c, trng)
                print '------------- sampling from valid ----------'
                idx = engine.kf_valid[numpy.random.randint(1, len(engine.kf_valid) - 1)]
                tags = [engine.valid[index] for index in idx]
                x_s, mask_s, ctx_s, mask_ctx_s, ctx_s_c,mask_ctx_s_c = data_engine.prepare_data(engine, tags)
                model.sample_execute_ensemble(engine, model_options,model_options_c, tparams,tparams_c,
                                          f_init, f_init_c, f_next, f_next_c, x_s, ctx_s,
                                     mask_ctx_s, ctx_s_c, mask_ctx_s_c, trng)

            if validFreq != -1 and numpy.mod(uidx, validFreq) == 0:
                current_params = utils.unzip(tparams)

                use_noise.set_value(0.)
                train_err = -1
                train_perp = -1
                valid_err = -1
                valid_perp = -1
                test_err = -1
                test_perp = -1

                mean_ranking = 0
                blue_t0 = time.time()
                scores, processes, queue, rqueue, shared_params = \
                    metrics.compute_score_ensemble(
                    model_type='attention',
                    model_archive=current_params,
                    options=model_options,
                    options_c=model_options_c,
                    engine=engine,
                    save_dir=save_model_dir,
                    beam=5, n_process=5,
                    whichset='both',
                    on_cpu=False,
                    processes=processes, queue=queue, rqueue=rqueue,
                    shared_params=shared_params, metric=metric,
                    one_time=False,
                    f_init=f_init, f_init_c=f_init_c, f_next=f_next, f_next_c= f_next_c, model=model
                    )
                '''
                 {'blue': {'test': [-1], 'valid': [77.7, 60.5, 48.7, 38.5, 38.3]},
                 'alternative_valid': {'Bleu_3': 0.40702270203174923,
                 'Bleu_4': 0.29276570520368456,
                 'CIDEr': 0.25247168210607884,
                 'Bleu_2': 0.529069629270047,
                 'Bleu_1': 0.6804308797115253,
                 'ROUGE_L': 0.51083584331688392},
                 'meteor': {'test': [-1], 'valid': [0.282787550236724]}}
                '''

                valid_B1 = scores['valid']['Bleu_1']
                valid_B2 = scores['valid']['Bleu_2']
                valid_B3 = scores['valid']['Bleu_3']
                valid_B4 = scores['valid']['Bleu_4']
                valid_Rouge = scores['valid']['ROUGE_L']
                valid_Cider = scores['valid']['CIDEr']
                valid_meteor = scores['valid']['METEOR']
                test_B1 = scores['test']['Bleu_1']
                test_B2 = scores['test']['Bleu_2']
                test_B3 = scores['test']['Bleu_3']
                test_B4 = scores['test']['Bleu_4']
                test_Rouge = scores['test']['ROUGE_L']
                test_Cider = scores['test']['CIDEr']
                test_meteor = scores['test']['METEOR']
                print 'computing meteor/blue score used %.4f sec, '\
                  'blue score: %.1f, meteor score: %.1f'%(
                time.time()-blue_t0, valid_B4, valid_meteor)


                if test_B4>0.52 and test_meteor>0.32:
                    print 'Saving to %s...'%save_model_dir,
                    numpy.savez(
                        save_model_dir+'model_'+str(uidx)+'.npz',
                         **current_params)

                print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err, \
                  'best valid err so far',best_valid_err
                print 'valid took %.2f sec'%(time.time() - t0_valid)
                # end of validatioin
                sys.exit()
            if debug:
                break
        if estop:
            break
        if debug:
            break

        # end for loop over minibatches
        print 'This epoch has seen %d samples, train cost %.2f'%(
            n_samples, numpy.mean(train_costs))
    # end for loop over epochs
    print 'Optimization ended.'
    if best_p is not None:
        utils.zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = 0
    test_err = 0
    if not debug:
        #if valid:
        valid_err, valid_perp = model.pred_probs(
            engine, 'valid', f_log_probs,
            verbose=model_options['verbose'])
        #if test:
        #test_err, test_perp = self.pred_probs(
        #    'test', f_log_probs,
        #    verbose=model_options['verbose'])


    print 'stopped at epoch %d, minibatch %d, '\
      'curent Train %.2f, current Valid %.2f, current Test %.2f '%(
          eidx,uidx,numpy.mean(train_err),numpy.mean(valid_err),numpy.mean(test_err))
    params = copy.copy(best_p)
    numpy.savez(save_model_dir+'model_best.npz',
                train_err=train_err,
                valid_err=valid_err, test_err=test_err, history_errs=history_errs,
                **params)

    if history_errs != []:
        history = numpy.asarray(history_errs)
        best_valid_idx = history[:,6].argmin()
        numpy.savetxt(save_model_dir+'train_valid_test.txt', history, fmt='%.4f')
        print 'final best exp ', history[best_valid_idx]

    return train_err, valid_err, test_err

Пример #23

Показать файл

class Model(object):
    def __init__(self):
        self.layers = Layers()

    def init_params(self, options):
        # all parameters
        params = OrderedDict()
        # embedding
        params['Wemb'] = utils.norm_weight(options['vocab_size'],
                                           options['word_dim'])
        # LSTM initial states
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_state',
                                                nin=options['ctx_dim'],
                                                nout=options['lstm_dim'])
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_memory',
                                                nin=options['ctx_dim'],
                                                nout=options['lstm_dim'])
        # decoder: LSTM
        params = self.layers.get_layer('lstm_cond')[0](
            options,
            params,
            prefix='bo_lstm',
            nin=options['word_dim'],
            dim=options['lstm_dim'],
            dimctx=options['ctx_dim'])
        params = self.layers.get_layer('lstm')[0](params,
                                                  nin=options['lstm_dim'],
                                                  dim=options['lstm_dim'],
                                                  prefix='to_lstm')
        # readout
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_logit_bo',
                                                nin=options['lstm_dim'],
                                                nout=options['word_dim'])
        if options['ctx2out']:
            params = self.layers.get_layer('ff')[0](options,
                                                    params,
                                                    prefix='ff_logit_ctx',
                                                    nin=options['ctx_dim'],
                                                    nout=options['word_dim'])
            params = self.layers.get_layer('ff')[0](options,
                                                    params,
                                                    prefix='ff_logit_to',
                                                    nin=options['lstm_dim'],
                                                    nout=options['word_dim'])
        # MLP
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_logit',
                                                nin=options['word_dim'],
                                                nout=options['vocab_size'])
        return params

    def build_model(self, tfparams, options, x, mask, ctx, ctx_mask):
        use_noise = tf.Variable(False,
                                dtype=tf.bool,
                                trainable=False,
                                name="use_noise")
        x_shape = tf.shape(x)
        n_timesteps = x_shape[0]
        n_samples = x_shape[1]
        # get word embeddings
        emb = tf.nn.embedding_lookup(
            tfparams['Wemb'], x,
            name="inputs_emb_lookup")  # (num_steps,64,512)
        emb_shape = tf.shape(emb)
        indices = tf.expand_dims(tf.range(1, emb_shape[0]), axis=1)
        emb_shifted = tf.scatter_nd(indices, emb[:-1], emb_shape)
        emb = emb_shifted

        # count num_frames==28
        with tf.name_scope("ctx_mean"):
            with tf.name_scope("counts"):
                counts = tf.expand_dims(
                    tf.reduce_sum(ctx_mask,
                                  axis=-1,
                                  name="reduce_sum_ctx_mask"), 1)  # (64,1)
            ctx_ = ctx
            ctx0 = ctx_  # (64,28,2048)
            ctx_mean = tf.reduce_sum(
                ctx0, axis=1, name="reduce_sum_ctx"
            ) / counts  #mean pooling of {vi}   # (64,2048)

        # initial state/cell
        with tf.name_scope("init_state"):
            init_state = self.layers.get_layer('ff')[1](
                tfparams, ctx_mean, options, prefix='ff_state',
                activ='tanh')  # (64,512)

        with tf.name_scope("init_memory"):
            init_memory = self.layers.get_layer('ff')[1](
                tfparams, ctx_mean, options, prefix='ff_memory',
                activ='tanh')  # (64,512)

        # hstltm = self.layers.build_hlstm(['bo_lstm','to_lstm'], inputs, n_timesteps, init_state, init_memory)
        with tf.name_scope("bo_lstm"):
            bo_lstm = self.layers.get_layer('lstm_cond')[1](
                tfparams,
                emb,
                options,
                prefix='bo_lstm',
                mask=mask,
                context=ctx0,
                one_step=False,
                init_state=init_state,
                init_memory=init_memory,
                use_noise=use_noise)
        with tf.name_scope("to_lstm"):
            to_lstm = self.layers.get_layer('lstm')[1](tfparams,
                                                       bo_lstm[0],
                                                       mask=mask,
                                                       one_step=False,
                                                       prefix='to_lstm')
        bo_lstm_h = bo_lstm[0]  # (t,64,512)
        to_lstm_h = to_lstm[0]  # (t,64,512)
        alphas = bo_lstm[2]  # (t,64,28)
        ctxs = bo_lstm[3]  # (t,64,2048)
        betas = bo_lstm[4]  # (t,64,)
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise)
        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](
            tfparams, bo_lstm_h, options, prefix='ff_logit_bo',
            activ='linear')  # (t,64,512)*(512,512) = (t,64,512)
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            to_lstm_h *= (1 - betas[:, :, None])  # (t,64,512)*(t,64,1)
            ctxs_beta = self.layers.get_layer('ff')[1](
                tfparams, ctxs, options, prefix='ff_logit_ctx',
                activ='linear')  # (t,64,2048)*(2048,512) = (t,64,512)
            ctxs_beta += self.layers.get_layer('ff')[1](
                tfparams,
                to_lstm_h,
                options,
                prefix='ff_logit_to',
                activ='linear'
            )  # (t,64,512)+((t,64,512)*(512,512)) = (t,64,512)
            logit += ctxs_beta
        logit = utils.tanh(logit)  # (t,64,512)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise)
        # (t,m,n_words)
        logit = self.layers.get_layer('ff')[1](
            tfparams, logit, options, prefix='ff_logit',
            activ='linear')  # (t,64,512)*(512,vocab_size) = (t,64,vocab_size)
        logit_shape = tf.shape(logit)
        # (t*m, n_words)
        probs = tf.nn.softmax(
            tf.reshape(logit,
                       [logit_shape[0] * logit_shape[1], logit_shape[2]
                        ]))  # (t*64, vocab_size)
        # cost
        x_flat = tf.reshape(x, [x_shape[0] * x_shape[1]])  # (t*m,)
        x_flat_shape = tf.shape(x_flat)
        gather_indices = tf.stack([tf.range(x_flat_shape[0]), x_flat],
                                  axis=1)  # (t*m,2)
        cost = -tf.log(
            tf.gather_nd(probs, gather_indices) +
            1e-8)  # (t*m,) : pick probs of each word in each timestep
        cost = tf.reshape(cost, [x_shape[0], x_shape[1]])  # (t,m)
        cost = tf.reduce_sum(
            (cost * mask), axis=0
        )  # (m,) : sum across all timesteps for each element in batch
        extra = [probs, alphas, betas]
        return use_noise, cost, extra

    def build_sampler(self,
                      tfparams,
                      options,
                      use_noise,
                      ctx0,
                      ctx_mask,
                      x,
                      bo_init_state_sampler,
                      to_init_state_sampler,
                      bo_init_memory_sampler,
                      to_init_memory_sampler,
                      mode=None):
        # ctx: # frames x ctx_dim
        ctx_ = ctx0
        counts = tf.reduce_sum(ctx_mask, axis=-1)  # scalar

        ctx = ctx_
        ctx_mean = tf.reduce_sum(ctx, axis=0) / counts  # (2048,)
        ctx = tf.expand_dims(ctx, 0)  # (1,28,2048)

        # initial state/cell
        bo_init_state = self.layers.get_layer('ff')[1](tfparams,
                                                       ctx_mean,
                                                       options,
                                                       prefix='ff_state',
                                                       activ='tanh')  # (512,)
        bo_init_memory = self.layers.get_layer('ff')[1](tfparams,
                                                        ctx_mean,
                                                        options,
                                                        prefix='ff_memory',
                                                        activ='tanh')  # (512,)
        to_init_state = tf.zeros(
            shape=(options['lstm_dim'], ),
            dtype=tf.float32)  # DOUBT : constant or not? # (512,)
        to_init_memory = tf.zeros(shape=(options['lstm_dim'], ),
                                  dtype=tf.float32)  # (512,)
        init_state = [bo_init_state, to_init_state]
        init_memory = [bo_init_memory, to_init_memory]

        print 'building f_init...',
        f_init = [ctx0] + init_state + init_memory
        print 'done'

        init_state = [bo_init_state_sampler, to_init_state_sampler]
        init_memory = [bo_init_memory_sampler, to_init_memory_sampler]

        # # if it's the first word, embedding should be all zero
        emb = tf.cond(
            tf.reduce_any(x[:, None] < 0), lambda: tf.zeros(
                shape=(1, tfparams['Wemb'].shape[1]), dtype=tf.float32),
            lambda: tf.nn.embedding_lookup(tfparams['Wemb'], x))  # (m,512)

        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tfparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=None,
            context=ctx,
            one_step=True,
            init_state=init_state[0],
            init_memory=init_memory[0],
            use_noise=use_noise,
            mode=mode)
        to_lstm = self.layers.get_layer('lstm')[1](tfparams,
                                                   bo_lstm[0],
                                                   mask=None,
                                                   one_step=True,
                                                   init_state=init_state[1],
                                                   init_memory=init_memory[1],
                                                   prefix='to_lstm')
        next_state = [bo_lstm[0], to_lstm[0]]
        next_memory = [bo_lstm[1], to_lstm[0]]

        bo_lstm_h = bo_lstm[0]  # (1,512)
        to_lstm_h = to_lstm[0]  # (1,512)
        alphas = bo_lstm[2]  # (1,28)
        ctxs = bo_lstm[3]  # (1,2048)
        betas = bo_lstm[4]  # (1,)
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise)
        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](
            tfparams, bo_lstm_h, options, prefix='ff_logit_bo',
            activ='linear')  # (1,512)*(512,512) = (1,512)
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            to_lstm_h *= (1 - betas[:, None])  # (1,512)*(1,1) = (1,512)
            ctxs_beta = self.layers.get_layer('ff')[1](
                tfparams, ctxs, options, prefix='ff_logit_ctx',
                activ='linear')  # (1,2048)*(2048,512) = (1,512)
            ctxs_beta += self.layers.get_layer('ff')[1](
                tfparams,
                to_lstm_h,
                options,
                prefix='ff_logit_to',
                activ='linear')  # (1,512)+((1,512)*(512,512)) = (1,512)
            logit += ctxs_beta
        logit = utils.tanh(logit)  # (1,512)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise)
        # (1,n_words)
        logit = self.layers.get_layer('ff')[1](
            tfparams, logit, options, prefix='ff_logit',
            activ='linear')  # (1,512)*(512,vocab_size) = (1,vocab_size)
        next_probs = tf.nn.softmax(logit)
        # next_sample = trng.multinomial(pvals=next_probs).argmax(1)    # INCOMPLETE , DOUBT : why is multinomial needed?
        next_sample = tf.multinomial(
            next_probs, 1)  # draw samples with given probabilities (1,1)
        next_sample_shape = tf.shape(next_sample)
        next_sample = tf.reshape(next_sample, [next_sample_shape[0]])
        # next word probability
        print 'building f_next...',
        f_next = [next_probs, next_sample] + next_state + next_memory
        print 'done'
        return f_init, f_next

    def gen_sample(self,
                   sess,
                   tfparams,
                   f_init,
                   f_next,
                   ctx0,
                   ctx_mask,
                   options,
                   k=1,
                   maxlen=30,
                   stochastic=False,
                   restrict_voc=False):
        '''
        ctx0: (28,2048) (f, dim_ctx)
        ctx_mask: (28,) (f, )

        restrict_voc: set the probability of outofvoc words with 0, renormalize
        '''
        if k > 1:
            assert not stochastic, 'Beam search does not support stochastic sampling'

        sample = []
        sample_score = []
        if stochastic:
            sample_score = 0

        live_k = 1
        dead_k = 0

        hyp_samples = [[]] * live_k
        hyp_scores = np.zeros(live_k).astype('float32')
        hyp_states = []
        hyp_memories = []

        # [(28,2048),(512,),(512,),(512,),(512,)]
        rval = sess.run(f_init,
                        feed_dict={
                            "ctx_sampler:0": ctx0,
                            "ctx_mask_sampler:0": ctx_mask
                        })
        ctx0 = rval[0]

        next_state = []
        next_memory = []
        n_layers_lstm = 2

        for lidx in xrange(n_layers_lstm):
            next_state.append(rval[1 + lidx])
            next_state[-1] = next_state[-1].reshape(
                [live_k, next_state[-1].shape[0]])
        for lidx in xrange(n_layers_lstm):
            next_memory.append(rval[1 + n_layers_lstm + lidx])
            next_memory[-1] = next_memory[-1].reshape(
                [live_k, next_memory[-1].shape[0]])
        next_w = -1 * np.ones((1, )).astype('int32')
        for ii in xrange(maxlen):
            # return [(1, vocab_size), (1,), (1, 512), (1, 512), (1, 512), (1, 512)]
            # next_w: vector (1,)
            # ctx: matrix   (28, 2048)
            # ctx_mask: vector  (28,)
            # next_state: [matrix] [(1, 512), (1, 512)]
            # next_memory: [matrix] [(1, 512), (1, 512)]
            rval = sess.run(f_next,
                            feed_dict={
                                "x_sampler:0": next_w,
                                "ctx_sampler:0": ctx0,
                                "ctx_mask_sampler:0": ctx_mask,
                                'bo_init_state_sampler:0': next_state[0],
                                'to_init_state_sampler:0': next_state[1],
                                'bo_init_memory_sampler:0': next_memory[0],
                                'to_init_memory_sampler:0': next_memory[1]
                            })
            next_p = rval[0]
            if restrict_voc:
                raise NotImplementedError()
            next_w = rval[1]  # already argmax sorted
            next_state = []
            for lidx in xrange(n_layers_lstm):
                next_state.append(rval[2 + lidx])
            next_memory = []
            for lidx in xrange(n_layers_lstm):
                next_memory.append(rval[2 + n_layers_lstm + lidx])
            if stochastic:
                sample.append(next_w[0])  # take the most likely one
                sample_score += next_p[0, next_w[0]]
                if next_w[0] == 0:
                    break
            else:
                # the first run is (1,vocab_size)
                cand_scores = hyp_scores[:, None] - np.log(next_p)
                cand_flat = cand_scores.flatten()
                ranks_flat = cand_flat.argsort()[:(k - dead_k)]

                voc_size = next_p.shape[1]
                trans_indices = ranks_flat / voc_size  # index of row
                word_indices = ranks_flat % voc_size  # index of col
                costs = cand_flat[ranks_flat]

                new_hyp_samples = []
                new_hyp_scores = np.zeros(k - dead_k).astype('float32')
                new_hyp_states = []
                for lidx in xrange(n_layers_lstm):
                    new_hyp_states.append([])
                new_hyp_memories = []
                for lidx in xrange(n_layers_lstm):
                    new_hyp_memories.append([])

                for idx, [ti, wi] in enumerate(zip(trans_indices,
                                                   word_indices)):
                    new_hyp_samples.append(hyp_samples[ti] + [wi])
                    new_hyp_scores[idx] = copy.copy(costs[idx])
                    for lidx in xrange(n_layers_lstm):
                        new_hyp_states[lidx].append(
                            copy.copy(next_state[lidx][ti]))
                    for lidx in xrange(n_layers_lstm):
                        new_hyp_memories[lidx].append(
                            copy.copy(next_memory[lidx][ti]))

                # check the finished samples
                new_live_k = 0
                hyp_samples = []
                hyp_scores = []
                hyp_states = []
                for lidx in xrange(n_layers_lstm):
                    hyp_states.append([])
                hyp_memories = []
                for lidx in xrange(n_layers_lstm):
                    hyp_memories.append([])

                for idx in xrange(len(new_hyp_samples)):
                    if new_hyp_samples[idx][-1] == 0:
                        sample.append(new_hyp_samples[idx])
                        sample_score.append(new_hyp_scores[idx])
                        dead_k += 1
                    else:
                        new_live_k += 1
                        hyp_samples.append(new_hyp_samples[idx])
                        hyp_scores.append(new_hyp_scores[idx])
                        for lidx in xrange(n_layers_lstm):
                            hyp_states[lidx].append(new_hyp_states[lidx][idx])
                        for lidx in xrange(n_layers_lstm):
                            hyp_memories[lidx].append(
                                new_hyp_memories[lidx][idx])
                hyp_scores = np.array(hyp_scores)
                live_k = new_live_k

                if new_live_k < 1:
                    break
                if dead_k >= k:
                    break

                next_w = np.array([w[-1] for w in hyp_samples])
                next_state = []
                for lidx in xrange(n_layers_lstm):
                    next_state.append(np.array(hyp_states[lidx]))
                next_memory = []
                for lidx in xrange(n_layers_lstm):
                    next_memory.append(np.array(hyp_memories[lidx]))

        if not stochastic:
            # dump every remaining one
            if live_k > 0:
                for idx in xrange(live_k):
                    sample.append(hyp_samples[idx])
                    sample_score.append(hyp_scores[idx])

        return sample, sample_score, next_state, next_memory

    def pred_probs(self, sess, engine, whichset, f_log_probs, verbose=True):
        probs = []
        n_done = 0
        NLL = []
        L = []
        if whichset == 'train':
            tags = engine.train_data_ids
            iterator = engine.kf_train
        elif whichset == 'val':
            tags = engine.val_data_ids
            iterator = engine.kf_val
        elif whichset == 'test':
            tags = engine.test_data_ids
            iterator = engine.kf_test
        else:
            raise NotImplementedError()
        n_samples = np.sum([len(index) for index in iterator])
        for index in iterator:
            tag = [tags[i] for i in index]
            x, mask, ctx, ctx_mask = prepare_data(engine, tag, mode=whichset)

            pred_probs = sess.run(f_log_probs,
                                  feed_dict={
                                      "word_seq_x:0": x,
                                      "word_seq_mask:0": mask,
                                      "ctx:0": ctx,
                                      "ctx_mask:0": ctx_mask
                                  })

            L.append(mask.sum(0).tolist())
            NLL.append((-1 * pred_probs).tolist())
            probs.append(pred_probs.tolist())
            n_done += len(tag)
            if verbose:
                sys.stdout.write('\rComputing LL on %d/%d examples' %
                                 (n_done, n_samples))
                sys.stdout.flush()
        print ""
        probs = utils.flatten_list_of_list(probs)
        NLL = utils.flatten_list_of_list(NLL)
        L = utils.flatten_list_of_list(L)
        perp = 2**(np.sum(NLL) / np.sum(L) / np.log(2))
        return -1 * np.mean(probs), perp

    def sample_execute(self, sess, engine, options, tfparams, f_init, f_next,
                       x, ctx, ctx_mask):
        stochastic = not options['beam_search']
        if stochastic:
            beam = 1
        else:
            beam = 5
        # x = (t,64)
        # ctx = (64,28,2048)
        # ctx_mask = (64,28)
        for jj in xrange(np.minimum(10, x.shape[1])):
            sample, score, _, _ = self.gen_sample(sess,
                                                  tfparams,
                                                  f_init,
                                                  f_next,
                                                  ctx[jj],
                                                  ctx_mask[jj],
                                                  options,
                                                  k=beam,
                                                  maxlen=30,
                                                  stochastic=stochastic)
            if not stochastic:
                best_one = np.argmin(score)
                sample = sample[best_one]
            else:
                sample = sample
            print 'Truth ', jj, ': ',
            for vv in x[:, jj]:
                if vv == 0:
                    break
                if vv in engine.reverse_vocab:
                    print engine.reverse_vocab[vv],
                else:
                    print 'UNK',
            print ""
            for kk, ss in enumerate([sample]):
                print 'Sample (', jj, ') ', ': ',
                for vv in ss:
                    if vv == 0:
                        break
                    if vv in engine.reverse_vocab:
                        print engine.reverse_vocab[vv],
                    else:
                        print 'UNK',
            print ""

Пример #24

Показать файл

Файл: model.py Проект: zapplea/sentiment_ai_challenge

 def __init__(self, config):
     self.config = config
     self.layers = Layers(config)

Пример #25

Показать файл

Файл: FlatStack.py Проект: JarrettR/FlatStack

 def load_svg(self, filename):
     paths, attributes, svg_attributes = svg2paths2(filename)
     self.layers = Layers()
     self.layers.load_layers(paths, attributes)

Пример #26

Показать файл

class Model(object):
    def __init__(self):
        self.layers = Layers()

    def init_params(self, options):
        # all parameters
        params = OrderedDict()
        # embedding
        ctx_dim_c = 4096
        params['Wemb'] = utils.norm_weight(options['n_words'],
                                           options['dim_word'])

        ctx_dim = options['ctx_dim']

        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_state',
                                                nin=ctx_dim,
                                                nout=options['dim'])
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_memory',
                                                nin=ctx_dim,
                                                nout=options['dim'])
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_state_c',
                                                nin=ctx_dim_c,
                                                nout=options['dim'])
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_memory_c',
                                                nin=ctx_dim_c,
                                                nout=options['dim'])

        # decoder: LSTM
        params = self.layers.get_layer('lstm_cond')[0](options,
                                                       params,
                                                       prefix='bo_lstm',
                                                       nin=options['dim_word'],
                                                       dim=options['dim'],
                                                       dimctx=ctx_dim)
        params = self.layers.get_layer('lstm')[0](params,
                                                  nin=options['dim'],
                                                  dim=options['dim'],
                                                  prefix='to_lstm')

        # readout
        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_logit_bo',
                                                nin=options['dim'],
                                                nout=options['dim_word'])
        if options['ctx2out']:
            params = self.layers.get_layer('ff')[0](options,
                                                    params,
                                                    prefix='ff_logit_ctx',
                                                    nin=ctx_dim,
                                                    nout=options['dim_word'])
            params = self.layers.get_layer('ff')[0](options,
                                                    params,
                                                    prefix='ff_logit_ctx_c',
                                                    nin=ctx_dim_c,
                                                    nout=options['dim_word'])
            params = self.layers.get_layer('ff')[0](options,
                                                    params,
                                                    prefix='ff_logit_to',
                                                    nin=options['dim'],
                                                    nout=options['dim_word'])

        params = self.layers.get_layer('ff')[0](options,
                                                params,
                                                prefix='ff_logit',
                                                nin=options['dim_word'],
                                                nout=options['n_words'])
        return params

    def build_model(self, tparams, options):
        trng = RandomStreams(1234)
        use_noise = theano.shared(numpy.float32(0.))
        # description string: #words x #samples
        x = tensor.matrix('x', dtype='int64')
        mask = tensor.matrix('mask', dtype='float32')
        # context: #samples x #annotations x dim
        ctx = tensor.tensor3('ctx', dtype='float32')
        mask_ctx = tensor.matrix('mask_ctx', dtype='float32')
        ctx_c = tensor.tensor3('ctx_c', dtype='float32')
        mask_ctx_c = tensor.matrix('mask_ctx_c', dtype='float32')
        n_timesteps = x.shape[0]
        n_samples = x.shape[1]

        # index into the word embedding matrix, shift it forward in time
        emb = tparams['Wemb'][x.flatten()].reshape(
            [n_timesteps, n_samples, options['dim_word']])
        emb_shifted = tensor.zeros_like(emb)
        emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
        emb = emb_shifted
        counts = mask_ctx.sum(-1).dimshuffle(0, 'x')

        ctx_ = ctx
        ctx_c_ = ctx_c

        ctx0 = ctx_
        ctx_mean = ctx0.sum(1) / counts

        ctx0_c = ctx_c_
        ctx_mean_c = ctx0_c.sum(1) / counts

        # initial state/cell
        init_state = self.layers.get_layer('ff')[1](tparams,
                                                    ctx_mean,
                                                    options,
                                                    prefix='ff_state',
                                                    activ='tanh')
        init_memory = self.layers.get_layer('ff')[1](tparams,
                                                     ctx_mean,
                                                     options,
                                                     prefix='ff_memory',
                                                     activ='tanh')
        init_state_c = self.layers.get_layer('ff')[1](tparams,
                                                      ctx_mean_c,
                                                      options,
                                                      prefix='ff_state_c',
                                                      activ='tanh')
        init_memory_c = self.layers.get_layer('ff')[1](tparams,
                                                       ctx_mean_c,
                                                       options,
                                                       prefix='ff_memory_c',
                                                       activ='tanh')

        init_state += init_state_c
        init_memory += init_memory_c

        # decoder
        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=mask,
            context=ctx0,
            context_c=ctx0_c,
            one_step=False,
            init_state=init_state,
            init_memory=init_memory,
            trng=trng,
            use_noise=use_noise)
        to_lstm = self.layers.get_layer('lstm')[1](tparams,
                                                   bo_lstm[0],
                                                   mask=mask,
                                                   one_step=False,
                                                   prefix='to_lstm')

        bo_lstm_h = bo_lstm[0]
        to_lstm_h = to_lstm[0]
        alphas = bo_lstm[2]
        alphas_c = bo_lstm[3]
        ctxs = bo_lstm[4]
        ctxs_c = bo_lstm[5]
        weight = bo_lstm[6]
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng)

        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](tparams,
                                               bo_lstm_h,
                                               options,
                                               prefix='ff_logit_bo',
                                               activ='linear')
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            betas = weight[:, :, 2]
            #betas = betas.reshape([betas.shape[1],betas.shape[2]])
            to_lstm_h *= betas[:, :, None]
            ctxs_beta = self.layers.get_layer('ff')[1](tparams,
                                                       ctxs,
                                                       options,
                                                       prefix='ff_logit_ctx',
                                                       activ='linear')
            ctxs_beta_c = self.layers.get_layer('ff')[1](
                tparams,
                ctxs_c,
                options,
                prefix='ff_logit_ctx_c',
                activ='linear')
            to_lstm_h = self.layers.get_layer('ff')[1](tparams,
                                                       to_lstm_h,
                                                       options,
                                                       prefix='ff_logit_to',
                                                       activ='linear')
            logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h
        logit = utils.tanh(logit)

        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise, trng)

        # (t,m,n_words)
        logit = self.layers.get_layer('ff')[1](tparams,
                                               logit,
                                               options,
                                               prefix='ff_logit',
                                               activ='linear')
        logit_shp = logit.shape
        # (t*m, n_words)
        probs = tensor.nnet.softmax(
            logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))
        # cost
        x_flat = x.flatten()  # (t*m,)
        cost = -tensor.log(probs[tensor.arange(x_flat.shape[0]), x_flat] +
                           1e-8)

        cost = cost.reshape([x.shape[0], x.shape[1]])
        cost = (cost * mask).sum(0)
        extra = [probs, alphas, alphas_c, weight[:, :, 0], weight[:, :, 1]]

        return trng, use_noise, x, mask, ctx, mask_ctx, ctx_c, mask_ctx_c, cost, extra

    def build_sampler(self, tparams, options, use_noise, trng, mode=None):
        # context: #annotations x dim
        ctx0 = tensor.matrix('ctx_sampler', dtype='float32')
        # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32')
        ctx_mask = tensor.vector('ctx_mask', dtype='float32')
        # ctx_mask.tag.test_value = numpy.random.binomial(n=1,p=0.5,size=(50,)).astype('float32')
        ctx0_c = tensor.matrix('ctx_sampler_c', dtype='float32')
        # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32')
        ctx_mask_c = tensor.vector('ctx_mask_c', dtype='float32')

        ctx_ = ctx0
        counts = ctx_mask.sum(-1)

        ctx = ctx_
        ctx_mean = ctx.sum(0) / counts

        ctx_c_ = ctx0_c
        counts_c = ctx_mask_c.sum(-1)

        ctx_c = ctx_c_
        ctx_mean_c = ctx_c.sum(0) / counts_c

        # ctx_mean = ctx.mean(0)
        ctx = ctx.dimshuffle('x', 0, 1)
        # initial state/cell
        bo_init_state = self.layers.get_layer('ff')[1](tparams,
                                                       ctx_mean,
                                                       options,
                                                       prefix='ff_state',
                                                       activ='tanh')
        bo_init_memory = self.layers.get_layer('ff')[1](tparams,
                                                        ctx_mean,
                                                        options,
                                                        prefix='ff_memory',
                                                        activ='tanh')

        bo_init_state_c = self.layers.get_layer('ff')[1](tparams,
                                                         ctx_mean_c,
                                                         options,
                                                         prefix='ff_state_c',
                                                         activ='tanh')
        bo_init_memory_c = self.layers.get_layer('ff')[1](tparams,
                                                          ctx_mean_c,
                                                          options,
                                                          prefix='ff_memory_c',
                                                          activ='tanh')

        bo_init_state += bo_init_state_c
        bo_init_memory += bo_init_memory_c

        to_init_state = tensor.alloc(0., options['dim'])
        to_init_memory = tensor.alloc(0., options['dim'])
        init_state = [bo_init_state, to_init_state]
        init_memory = [bo_init_memory, to_init_memory]

        print 'Building f_init...',
        f_init = theano.function([ctx0, ctx_mask, ctx0_c, ctx_mask_c],
                                 [ctx0] + init_state + init_memory,
                                 name='f_init',
                                 on_unused_input='ignore',
                                 profile=False,
                                 mode=mode)
        print 'Done'

        x = tensor.vector('x_sampler', dtype='int64')
        init_state = [
            tensor.matrix('bo_init_state', dtype='float32'),
            tensor.matrix('to_init_state', dtype='float32')
        ]
        init_memory = [
            tensor.matrix('bo_init_memory', dtype='float32'),
            tensor.matrix('to_init_memory', dtype='float32')
        ]

        # if it's the first word, emb should be all zero
        emb = tensor.switch(x[:, None] < 0,
                            tensor.alloc(0., 1, tparams['Wemb'].shape[1]),
                            tparams['Wemb'][x])

        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=None,
            context=ctx,
            context_c=ctx_c,
            one_step=True,
            init_state=init_state[0],
            init_memory=init_memory[0],
            trng=trng,
            use_noise=use_noise,
            mode=mode)
        to_lstm = self.layers.get_layer('lstm')[1](tparams,
                                                   bo_lstm[0],
                                                   mask=None,
                                                   one_step=True,
                                                   init_state=init_state[1],
                                                   init_memory=init_memory[1],
                                                   prefix='to_lstm')
        next_state = [bo_lstm[0], to_lstm[0]]
        next_memory = [bo_lstm[1], to_lstm[0]]

        bo_lstm_h = bo_lstm[0]
        to_lstm_h = to_lstm[0]
        alphas = bo_lstm[2]
        alphas_c = bo_lstm[3]
        ctxs = bo_lstm[4]
        ctxs_c = bo_lstm[5]
        weight = bo_lstm[6]
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng)

        logit = self.layers.get_layer('ff')[1](tparams,
                                               bo_lstm_h,
                                               options,
                                               prefix='ff_logit_bo',
                                               activ='linear')
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            betas = weight[:, 2]
            # betas = betas.reshape([betas.shape[1],betas.shape[2]])
            to_lstm_h *= betas[:, None]
            ctxs_beta = self.layers.get_layer('ff')[1](tparams,
                                                       ctxs,
                                                       options,
                                                       prefix='ff_logit_ctx',
                                                       activ='linear')
            ctxs_beta_c = self.layers.get_layer('ff')[1](
                tparams,
                ctxs_c,
                options,
                prefix='ff_logit_ctx_c',
                activ='linear')
            to_lstm_h = self.layers.get_layer('ff')[1](tparams,
                                                       to_lstm_h,
                                                       options,
                                                       prefix='ff_logit_to',
                                                       activ='linear')
            logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h
        logit = utils.tanh(logit)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise, trng)

        logit = self.layers.get_layer('ff')[1](tparams,
                                               logit,
                                               options,
                                               prefix='ff_logit',
                                               activ='linear')
        logit_shp = logit.shape
        next_probs = tensor.nnet.softmax(logit)
        next_sample = trng.multinomial(pvals=next_probs).argmax(1)

        # next word probability
        print 'building f_next...'
        f_next = theano.function(
            [x, ctx0, ctx_mask, ctx0_c, ctx_mask_c] + init_state + init_memory,
            [next_probs, next_sample] + next_state + next_memory,
            name='f_next',
            profile=False,
            mode=mode,
            on_unused_input='ignore')
        print 'Done'
        return f_init, f_next

    def gen_sample(self,
                   tparams,
                   f_init,
                   f_next,
                   ctx0,
                   ctx0_c,
                   ctx_mask,
                   ctx_mask_c,
                   options,
                   trng=None,
                   k=1,
                   maxlen=30,
                   stochastic=False,
                   restrict_voc=False):
        '''
        ctx0: (26,1024)
        ctx_mask: (26,)

        restrict_voc: set the probability of outofvoc words with 0, renormalize
        '''

        if k > 1:
            assert not stochastic, 'Beam search does not support stochastic sampling'

        sample = []
        sample_score = []
        if stochastic:
            sample_score = 0

        live_k = 1
        dead_k = 0

        hyp_samples = [[]] * live_k
        hyp_scores = numpy.zeros(live_k).astype('float32')
        hyp_states = []
        hyp_memories = []

        # [(26,1024),(512,),(512,)]
        rval = f_init(ctx0, ctx_mask, ctx0_c, ctx_mask_c)
        ctx0 = rval[0]

        next_state = []
        next_memory = []
        n_layers_lstm = 2

        for lidx in xrange(n_layers_lstm):
            next_state.append(rval[1 + lidx])
            next_state[-1] = next_state[-1].reshape(
                [live_k, next_state[-1].shape[0]])
        for lidx in xrange(n_layers_lstm):
            next_memory.append(rval[1 + n_layers_lstm + lidx])
            next_memory[-1] = next_memory[-1].reshape(
                [live_k, next_memory[-1].shape[0]])
        next_w = -1 * numpy.ones((1, )).astype('int64')
        # next_state: [(1,512)]
        # next_memory: [(1,512)]
        for ii in xrange(maxlen):
            # return [(1, 50000), (1,), (1, 512), (1, 512)]
            # next_w: vector
            # ctx: matrix
            # ctx_mask: vector
            # next_state: [matrix]
            # next_memory: [matrix]
            rval = f_next(*([next_w, ctx0, ctx_mask, ctx0_c, ctx_mask_c] +
                            next_state + next_memory))
            next_p = rval[0]
            if restrict_voc:
                raise NotImplementedError()
            next_w = rval[1]  # already argmax sorted
            next_state = []
            for lidx in xrange(n_layers_lstm):
                next_state.append(rval[2 + lidx])
            next_memory = []
            for lidx in xrange(n_layers_lstm):
                next_memory.append(rval[2 + n_layers_lstm + lidx])
            if stochastic:
                sample.append(next_w[0])  # take the most likely one
                sample_score += next_p[0, next_w[0]]
                if next_w[0] == 0:
                    break
            else:
                # the first run is (1,50000)
                cand_scores = hyp_scores[:, None] - numpy.log(next_p)
                cand_flat = cand_scores.flatten()
                ranks_flat = cand_flat.argsort()[:(k - dead_k)]

                voc_size = next_p.shape[1]
                trans_indices = ranks_flat / voc_size  # index of row
                word_indices = ranks_flat % voc_size  # index of col
                costs = cand_flat[ranks_flat]

                new_hyp_samples = []
                new_hyp_scores = numpy.zeros(k - dead_k).astype('float32')
                new_hyp_states = []
                for lidx in xrange(n_layers_lstm):
                    new_hyp_states.append([])
                new_hyp_memories = []
                for lidx in xrange(n_layers_lstm):
                    new_hyp_memories.append([])

                for idx, [ti, wi] in enumerate(zip(trans_indices,
                                                   word_indices)):
                    new_hyp_samples.append(hyp_samples[ti] + [wi])
                    new_hyp_scores[idx] = copy.copy(costs[idx])
                    for lidx in xrange(n_layers_lstm):
                        new_hyp_states[lidx].append(
                            copy.copy(next_state[lidx][ti]))
                    for lidx in xrange(n_layers_lstm):
                        new_hyp_memories[lidx].append(
                            copy.copy(next_memory[lidx][ti]))

                # check the finished samples
                new_live_k = 0
                hyp_samples = []
                hyp_scores = []
                hyp_states = []
                for lidx in xrange(n_layers_lstm):
                    hyp_states.append([])
                hyp_memories = []
                for lidx in xrange(n_layers_lstm):
                    hyp_memories.append([])

                for idx in xrange(len(new_hyp_samples)):
                    if new_hyp_samples[idx][-1] == 0:
                        sample.append(new_hyp_samples[idx])
                        sample_score.append(new_hyp_scores[idx])
                        dead_k += 1
                    else:
                        new_live_k += 1
                        hyp_samples.append(new_hyp_samples[idx])
                        hyp_scores.append(new_hyp_scores[idx])
                        for lidx in xrange(n_layers_lstm):
                            hyp_states[lidx].append(new_hyp_states[lidx][idx])
                        for lidx in xrange(n_layers_lstm):
                            hyp_memories[lidx].append(
                                new_hyp_memories[lidx][idx])
                hyp_scores = numpy.array(hyp_scores)
                live_k = new_live_k

                if new_live_k < 1:
                    break
                if dead_k >= k:
                    break

                next_w = numpy.array([w[-1] for w in hyp_samples])
                next_state = []
                for lidx in xrange(n_layers_lstm):
                    next_state.append(numpy.array(hyp_states[lidx]))
                next_memory = []
                for lidx in xrange(n_layers_lstm):
                    next_memory.append(numpy.array(hyp_memories[lidx]))

        if not stochastic:
            # dump every remaining one
            if live_k > 0:
                for idx in xrange(live_k):
                    sample.append(hyp_samples[idx])
                    sample_score.append(hyp_scores[idx])

        return sample, sample_score, next_state, next_memory

    def pred_probs(self, engine, whichset, f_log_probs, verbose=True):

        probs = []
        n_done = 0
        NLL = []
        L = []
        if whichset == 'train':
            tags = engine.train
            iterator = engine.kf_train
        elif whichset == 'valid':
            tags = engine.valid
            iterator = engine.kf_valid
        elif whichset == 'test':
            tags = engine.test
            iterator = engine.kf_test
        else:
            raise NotImplementedError()
        n_samples = numpy.sum([len(index) for index in iterator])
        for index in iterator:
            tag = [tags[i] for i in index]
            x, mask, ctx, ctx_mask, ctx_c, ctx_mask_c = prepare_data(
                engine, tag)
            pred_probs = f_log_probs(x, mask, ctx, ctx_mask, ctx_c, ctx_mask_c)
            L.append(mask.sum(0).tolist())
            NLL.append((-1 * pred_probs).tolist())
            probs.append(pred_probs.tolist())
            n_done += len(tag)
            if verbose:
                sys.stdout.write('\rComputing LL on %d/%d examples' %
                                 (n_done, n_samples))
                sys.stdout.flush()
        print
        probs = utils.flatten_list_of_list(probs)
        NLL = utils.flatten_list_of_list(NLL)
        L = utils.flatten_list_of_list(L)
        perp = 2**(numpy.sum(NLL) / numpy.sum(L) / numpy.log(2))
        return -1 * numpy.mean(probs), perp

    def sample_execute(self, engine, options, tparams, f_init, f_next, x, ctx,
                       ctx_c, ctx_mask, ctx_mask_c, trng):
        stochastic = False
        for jj in xrange(numpy.minimum(10, x.shape[1])):
            sample, score, _, _ = self.gen_sample(tparams,
                                                  f_init,
                                                  f_next,
                                                  ctx[jj],
                                                  ctx_c[jj],
                                                  ctx_mask[jj],
                                                  ctx_mask_c[jj],
                                                  options,
                                                  trng=trng,
                                                  k=5,
                                                  maxlen=30,
                                                  stochastic=stochastic)
            if not stochastic:
                best_one = numpy.argmin(score)
                sample = sample[best_one]
            else:
                sample = sample
            print 'Truth ', jj, ': ',
            for vv in x[:, jj]:
                if vv == 0:
                    break
                if vv in engine.word_idict:
                    print engine.word_idict[vv],
                else:
                    print 'UNK',
            print
            for kk, ss in enumerate([sample]):
                print 'Sample (', jj, ') ', ': ',
                for vv in ss:
                    if vv == 0:
                        break
                    if vv in engine.word_idict:
                        print engine.word_idict[vv],
                    else:
                        print 'UNK',
            print

Пример #27

Показать файл

Файл: FlatStack.py Проект: JarrettR/FlatStack

class Scene(object):
    def __init__(self):
        self.db = Database('flatstack.db')
        self.db.create_default_tables()
        self.scene = canvas()
        self.scene.background = color.gray(0.8)
        self.scene.forward = vec(0,-0.2,-1)
        self.scene.fov = 0.2
        self.set_range()
        self.scene.caption = """Right button drag or Ctrl-drag to rotate "camera" to view scene.
        To zoom, drag with middle button or Alt/Option depressed, or use scroll wheel.
            On a two-button mouse, middle is left + right.
        Touch screen: pinch/extend to zoom, swipe or two-finger rotate.\n"""
        self.clear()

    def clear(self):
        self.objects = []
        for a in self.scene.objects:
            a.visible = False
            del a
        self.layers = None

    def set_range(self):
        #Todo: range to enclose bounding box of all points
        self.scene.range = 100

    def rate(self, sceneRate):
        rate(sceneRate)

    def apply(self):
        for l in self.layers.layers:
            extr = extrusion(path=[vec(0,0,l.depth), vec(0,0,0)],
                color=color.cyan,
                shape=[ l.path ],
                pos=(self.vector_to_vec(l.position) + vec(0,0,l.depth / 2)),
                angle=l.angle,
                axis=self.vector_to_vec(l.axis))
            if l.showAxis:
                self.draw_axis(l)
            if l.showPoints:
                self.draw_points(l)

    def draw_axis(self, layer):
        position = self.vector_to_vec(layer.position)
        mArrow = arrow(angle=layer.angle,
                axis=self.vector_to_vec(layer.axis),
                color=color.orange,
                length=40,
                pos=position)
        rArrow = arrow(axis=vec(1,0,0), color=color.red, length=50, pos=position, shaftwidth=1)
        gArrow = arrow(axis=vec(0,1,0), color=color.green, length=50, pos=position, shaftwidth=1)
        bArrow = arrow(axis=vec(0,0,1), color=color.blue, length=50, pos=position, shaftwidth=1)

    def draw_points(self, layer):
        position = self.vector_to_vec(layer.position)
        # print(layer.path)
        # for p in layer.path:
            # pBall = sphere(pos=(vec(p[0],p[1], 0) + position), radius=5)
        for v in layer.volume:
            p = self.vector_to_vec(v)
            pBall = sphere(pos=p, radius=5)

    def load_svg(self, filename):
        paths, attributes, svg_attributes = svg2paths2(filename)
        self.layers = Layers()
        self.layers.load_layers(paths, attributes)

    # def populate_db(self, filename):
    #     paths, attributes, svg_attributes = svg2paths2(filename)
    #     layers = Layers()
    #     #layers.load_layers(paths, attributes)
    #     #for l in layers.layers:
    #     #    print(l)
    #     joints = {}
    #     layerlist = []
    #     for p, a in zip(paths, attributes):
    #         if 'fsjoint' in a:
    #             if a['fsjoint'] in joints:
    #                 joints[a['fsjoint']].append(p)
    #             else:
    #                 joints[a['fsjoint']] = [p]
    #         else:
    #             #hacky. gross.
    #             el, ea = Layer(p,a).explode()
    #             self.db.insert_layer(ea)

    #     #self.layers = self.translate_joints(joints, layers)

    def vector_to_vec(self, inVec):
        return vec(inVec.to_points()[0], inVec.to_points()[1], inVec.to_points()[2])

Пример #28

Показать файл

Файл: model.py Проект: zapplea/HAN

class HAN:
    def __init__(self, config):
        self.config = config
        self.layers = Layers(config)

    def build_HAN_net(self):
        X_id = self.layers.X_input()
        senti_Y = self.layers.senti_Y_input()
        table = self.layers.word_embedding_table()
        mask = self.layers.padded_word_mask(X_id)
        X = self.layers.lookup(X_id, table, mask)
        seq_len = self.layers.sequence_length(X_id)
        sent_repr_ls = []
        for i in range(self.config['model']['sentAtt_num']):
            name = '_layer%d' % i
            X = self.layers.biLSTM(X, seq_len, name)
            graph = tf.get_default_graph()
            tf.add_to_collection(
                'reg',
                tf.contrib.layers.l2_regularizer(
                    self.config['model']['reg_rate'])(graph.get_tensor_by_name(
                        'biLSTM%s/bidirectional_rnn/fw/lstm_cell/kernel:0' %
                        name)))
            tf.add_to_collection(
                'reg',
                tf.contrib.layers.l2_regularizer(
                    self.config['model']['reg_rate'])(graph.get_tensor_by_name(
                        'biLSTM%s/bidirectional_rnn/bw/lstm_cell/kernel:0' %
                        name)))
            sent_att = self.layers.sent_attention(X, X_id)
            # (batch size, max sent len)
            sent_repr = self.layers.sent_repr(sent_att, X)
            sent_repr_ls.append(sent_repr)
        # (batch size, sentAtt_num * max sent len)
        sent_repr = tf.concat(sent_repr_ls, axis=1)
        senti_score = self.layers.score(sent_repr)
        pred = self.layers.senti_prediction(senti_score)
        loss = self.layers.senti_loss(senti_score, senti_Y)
        train_step = tf.train.AdamOptimizer(
            self.config['model']['lr']).minimize(loss)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        return {
            'loss': loss,
            'pred': pred,
            'graph': tf.get_default_graph(),
            'train_step': train_step,
            'saver': saver
        }

Пример #29

Показать файл

 def __init__(self):
     self.layers = Layers()

Пример #30

Показать файл

Файл: model.py Проект: zapplea/sentiment_ai_challenge

class Model:
    def __init__(self, config):
        self.config = config
        self.layers = Layers(config)

    def build_senti_net(self):
        X_id = self.layers.X_input()
        senti_Y = self.layers.senti_Y_input()
        table = self.layers.word_embedding_table()
        mask = self.layers.padded_word_mask(X_id)
        X = self.layers.lookup(X_id, table, mask)
        seq_len = self.layers.sequence_length(X_id)
        bisru_name = 'share'
        for i in range(self.config['model']['biSRU']['shared_layers_num']):
            # (batch size, max sent len, rnn_dim)
            X = self.layers.biSRU(X, seq_len, name=bisru_name)
        graph = tf.get_default_graph()
        tf.add_to_collection(
            'reg',
            tf.contrib.layers.l2_regularizer(self.config['model']['reg_rate'])(
                graph.get_tensor_by_name(
                    'biSRU_%s/bidirectional_rnn/fw/sru_cell/kernel:0' %
                    bisru_name)))
        tf.add_to_collection(
            'reg',
            tf.contrib.layers.l2_regularizer(self.config['model']['reg_rate'])(
                graph.get_tensor_by_name(
                    'biSRU_%s/bidirectional_rnn/bw/sru_cell/kernel:0' %
                    bisru_name)))
        # (batch size, rnn dim)
        sent_repr_ls = []
        for i in range(self.config['model']['biSRU']['separated_layers_num']):
            bisru_name = 'sep_layer' + str(i)
            X = self.layers.biSRU(X, seq_len, name=bisru_name)
            tf.add_to_collection(
                'reg',
                tf.contrib.layers.l2_regularizer(
                    self.config['model']['reg_rate'])(graph.get_tensor_by_name(
                        'biSRU_%s/bidirectional_rnn/fw/sru_cell/kernel:0' %
                        bisru_name)))
            tf.add_to_collection(
                'reg',
                tf.contrib.layers.l2_regularizer(
                    self.config['model']['reg_rate'])(graph.get_tensor_by_name(
                        'biSRU_%s/bidirectional_rnn/bw/sru_cell/kernel:0' %
                        bisru_name)))
            # (attr num, rnn dim)
            A = self.layers.attr_matrix(name=bisru_name)
            # (batch size, attr num, max sent len)
            att = self.layers.attention(A, X, X_id)
            # (batch size, attr num, rnn dim)
            sent_repr = self.layers.sent_repr(att, X)
            sent_repr_ls.append(sent_repr)
        # (batch size, attr num, sep bisru layers num * rnn dim)
        sent_repr = tf.concat(sent_repr_ls, axis=2)
        senti_score = self.layers.senti_score(sent_repr)
        pred = self.layers.senti_prediction(senti_score)
        loss = self.layers.senti_loss(senti_score, senti_Y)
        train_step = tf.train.AdamOptimizer(
            self.config['model']['lr']).minimize(loss)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        return {
            'loss': loss,
            'pred': pred,
            'graph': tf.get_default_graph(),
            'train_step': train_step,
            'saver': saver
        }