Exemplo n.º 1
0
    def __init__(self,
                 img_size,
                 glimpse_size,
                 n_what,
                 glimpse_encoder,
                 scale_offset=0.,
                 masked_glimpse=False,
                 debug=False):

        super(AIREncoder, self).__init__(img_size, glimpse_size, inverse=False)
        self.n_what = n_what
        self._masked_glimpse = masked_glimpse

        with self._enter_variable_scope():
            self._glimpse_encoder = glimpse_encoder
            self._what_distrib = GaussianFromParamVec(
                n_what,
                scale_offset=scale_offset,
                validate_args=debug,
                allow_nan_stats=not debug)

            if self._masked_glimpse:
                self._mask_mlp = MLP(
                    128,
                    n_out=np.prod(glimpse_size),
                    transfer=tf.nn.sigmoid,
                    output_initializers={'b': tf.constant_initializer(1.)})
Exemplo n.º 2
0
    def _build(self, inpt):
        n = np.prod(self._output_size)

        mlp = MLP(self._n_hidden, n_out=n)
        reshape = snt.BatchReshape(self._output_size)
        seq = snt.Sequential([mlp, reshape])
        return seq(inpt) * tf.get_variable('output_scale', initializer=self._output_scale)
Exemplo n.º 3
0
    def _make_priors(self, time_step, prior_conditioning):
        """Instantiates prior distributions for discovery.
        """

        is_first_timestep = tf.to_float(tf.equal(time_step, 0))

        if self._disc_prior_type == 'geom':
            num_steps_prior = tfd.Geometric(probs=1. - self._init_disc_step_success_prob)

        elif self._disc_prior_type == 'cat':
            init = [0.] * (self._n_steps + 1)
            step_logits = tf.Variable(init, trainable=True, dtype=tf.float32, name='step_prior_bias')

            # increase probability of zero steps when t>0
            init = [10.] + [0] * self._n_steps
            timstep_bias = tf.Variable(init, trainable=True, dtype=tf.float32, name='step_prior_timestep_bias')
            step_logits += (1. - is_first_timestep) * timstep_bias

            if prior_conditioning is not None:
                step_logits = tf.expand_dims(step_logits, 0) + MLP(10, n_out=self._n_steps + 1)(prior_conditioning)

            step_logits = tf.nn.elu(step_logits)
            num_steps_prior = tfd.Categorical(logits=step_logits)

        else:
            raise ValueError('Invalid prior type: {}'.format(self._disc_prior_type))

        return self._what_prior, self._where_prior, num_steps_prior
Exemplo n.º 4
0
    def _build(self, inpt):

        flatten = snt.BatchFlatten()
        mlp = MLP(self._n_hidden, n_out=8)
        seq = snt.Sequential([flatten, mlp])
        params = seq(inpt)

        return params[..., :4], params[..., 4:] + self._scale_bias
Exemplo n.º 5
0
    def _build(self, inpt):

        flatten = snt.BatchFlatten()
        mlp = MLP(self._n_hidden, n_out=8)
        seq = snt.Sequential([flatten, mlp])
        params = seq(inpt)

        scale_offset = tf.get_variable('scale_offset', initializer=self._scale_offset)
        return params[..., :4], params[..., 4:] + scale_offset
Exemplo n.º 6
0
    def _build(self, img, what, where, presence_prob):

        batch_size = int(img.get_shape()[0])
        parts = [
            tf.reshape(tf.transpose(i, (1, 0, 2)), (batch_size, -1))
            for i in (what, where, presence_prob)
        ]
        img_flat = tf.reshape(img, (batch_size, -1))
        baseline_inpts = [img_flat] + parts
        baseline_inpts = tf.concat(baseline_inpts, -1)
        mlp = MLP(self._n_hidden, n_out=1)
        baseline = mlp(baseline_inpts)
        return baseline
Exemplo n.º 7
0
    def __init__(self, n_steps, n_latent_code=0, relation_embedding=False):
        """Initialises the module.

        :param n_steps: Integer, number of inference steps to perform at this time-step.
        :param n_latent_code:  Integer, dimensionality of summary of latent variables.
        :param relation_embedding: Boolean; computes DeepSet-like embedding of latent variables if True.
        """
        super(AbstractTimstepModule, self).__init__()
        self._n_steps = n_steps
        self._n_latent_code = n_latent_code
        self._relation_embedding = relation_embedding

        with self._enter_variable_scope():
            if n_latent_code > 0:
                self._latent_encoder = MLP([n_latent_code] * 2)
Exemplo n.º 8
0
    def _build(self, previous_presence, previois_logit, *features):

        init = {'b': tf.constant_initializer(self._steps_bias)}
        mlp = MLP(self._n_hidden, n_out=1, output_initializers=init)

        features = ops.maybe_concat(features)
        logit = mlp(features)
        logit = previous_presence * logit + (previous_presence - 1.) * 88.

        if previois_logit is not None:
            if self._max_rel_logit_change != np.inf:
                min_logit = (1. - self._max_rel_logit_change) * previois_logit
                max_logit = (1. + self._max_rel_logit_change) * previois_logit
                logit = tf.clip_by_value(logit, min_logit, max_logit)

            elif self._max_logit_change != np.inf:
                logit = previois_logit + self._max_logit_change * tf.nn.tanh(logit)

        return self._bernoulli(logit)
Exemplo n.º 9
0
 def _build(self, inpt):
     mlp = MLP(self._n_hidden, n_out=1)
     logit = mlp(inpt) + self._steps_bias
     return tf.nn.sigmoid(logit)
Exemplo n.º 10
0
 def _build(self, inpt):
     n = np.prod(self._output_size)
     mlp = MLP(self._n_hidden, n_out=n)
     reshape = snt.BatchReshape(self._output_size)
     seq = snt.Sequential([mlp, reshape])
     return seq(inpt)
Exemplo n.º 11
0
 def _build(self, inpt):
     flat = snt.BatchFlatten()
     mlp = MLP(self._n_hidden)
     seq = snt.Sequential([flat, mlp])
     return seq(inpt)
Exemplo n.º 12
0
 def _embed(self, inpt):
     flatten = snt.BatchFlatten()
     mlp = MLP(self._n_hidden, n_out=self._n_param)
     seq = snt.Sequential([flatten, mlp])
     return seq(inpt)
Exemplo n.º 13
0
    def build(self):
        with tf.variable_scope("Embeddings"):
            self.embeddings = tf.get_variable(
                "emb", [self.config.n_embed, self.config.d_embed],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            embeddings_root = tf.get_variable(
                "emb_root", [1, 1, 2 * self.config.dim_sem],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            embeddings_root_s = tf.get_variable(
                "emb_root_s", [1, 1, 2 * self.config.dim_sem],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
        with tf.variable_scope("Model"):
            w_comb = tf.get_variable(
                "w_comb", [4 * self.config.dim_sem, 2 * self.config.dim_sem],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            b_comb = tf.get_variable("bias_comb", [2 * self.config.dim_sem],
                                     dtype=tf.float32,
                                     initializer=tf.constant_initializer())

            w_comb_s = tf.get_variable(
                "w_comb_s", [4 * self.config.dim_sem, 2 * self.config.dim_sem],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            b_comb_s = tf.get_variable("bias_comb_s",
                                       [2 * self.config.dim_sem],
                                       dtype=tf.float32,
                                       initializer=tf.constant_initializer())

            w_softmax = tf.get_variable(
                "w_softmax", [2 * self.config.dim_sem, self.config.dim_output],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            b_softmax = tf.get_variable(
                "bias_softmax", [self.config.dim_output],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())

        with tf.variable_scope("Structure/doc"):
            tf.get_variable("w_parser_p",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_c",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_s",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
        with tf.variable_scope("Structure/sent"):
            tf.get_variable("w_parser_p",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_c",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

            tf.get_variable("w_parser_s",
                            [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        sent_l = self.t_variables['sent_l']
        doc_l = self.t_variables['doc_l']
        max_sent_l = self.t_variables['max_sent_l']
        max_doc_l = self.t_variables['max_doc_l']
        batch_l = self.t_variables['batch_l']

        tokens_input = tf.nn.embedding_lookup(
            self.embeddings,
            self.t_variables['token_idxs'][:, :max_doc_l, :max_sent_l])
        tokens_input = tf.nn.dropout(tokens_input,
                                     self.t_variables['keep_prob'])

        mask_tokens = self.t_variables[
            'mask_tokens'][:, :max_doc_l, :max_sent_l]
        mask_sents = self.t_variables['mask_sents'][:, :max_doc_l]
        [_, _, _, rnn_size] = tokens_input.get_shape().as_list()
        tokens_input_do = tf.reshape(
            tokens_input, [batch_l * max_doc_l, max_sent_l, rnn_size])

        sent_l = tf.reshape(sent_l, [batch_l * max_doc_l])
        mask_tokens = tf.reshape(mask_tokens, [batch_l * max_doc_l, -1])

        tokens_output, _ = dynamicBiRNN(tokens_input_do,
                                        sent_l,
                                        n_hidden=self.config.dim_hidden,
                                        cell_type=self.config.rnn_cell,
                                        cell_name='Model/sent')
        tokens_sem = tf.concat([
            tokens_output[0][:, :, :self.config.dim_sem],
            tokens_output[1][:, :, :self.config.dim_sem]
        ], 2)
        tokens_str = tf.concat([
            tokens_output[0][:, :, self.config.dim_sem:],
            tokens_output[1][:, :, self.config.dim_sem:]
        ], 2)
        temp1 = tf.zeros([batch_l * max_doc_l, max_sent_l, 1], tf.float32)
        temp2 = tf.zeros([batch_l * max_doc_l, 1, max_sent_l], tf.float32)

        mask1 = tf.ones([batch_l * max_doc_l, max_sent_l, max_sent_l - 1],
                        tf.float32)
        mask2 = tf.ones([batch_l * max_doc_l, max_sent_l - 1, max_sent_l],
                        tf.float32)
        mask1 = tf.concat([temp1, mask1], 2)
        mask2 = tf.concat([temp2, mask2], 1)

        str_scores_s_ = get_structure('sent', tokens_str, max_sent_l, mask1,
                                      mask2)  # batch_l,  sent_l+1, sent_l
        str_scores_s = tf.matrix_transpose(str_scores_s_)  # soft parent
        tokens_sem_root = tf.concat([
            tf.tile(embeddings_root_s, [batch_l * max_doc_l, 1, 1]), tokens_sem
        ], 1)
        tokens_output_ = tf.matmul(str_scores_s, tokens_sem_root)
        tokens_output = LReLu(
            tf.tensordot(tf.concat([tokens_sem, tokens_output_], 2), w_comb_s,
                         [[2], [0]]) + b_comb_s)

        if (self.config.sent_attention == 'sum'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens, 2)
            tokens_output = tf.reduce_sum(tokens_output, 1)
        elif (self.config.sent_attention == 'mean'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens, 2)
            tokens_output = tf.reduce_sum(tokens_output, 1) / tf.expand_dims(
                tf.cast(sent_l, tf.float32), 1)
        elif (self.config.sent_attention == 'max'):
            tokens_output = tokens_output + tf.expand_dims(
                (mask_tokens - 1) * 999, 2)
            tokens_output = tf.reduce_max(tokens_output, 1)

        sents_input = tf.reshape(tokens_output,
                                 [batch_l, max_doc_l, 2 * self.config.dim_sem])
        sents_output, _ = dynamicBiRNN(sents_input,
                                       doc_l,
                                       n_hidden=self.config.dim_hidden,
                                       cell_type=self.config.rnn_cell,
                                       cell_name='Model/doc')

        sents_sem = tf.concat([
            sents_output[0][:, :, :self.config.dim_sem],
            sents_output[1][:, :, :self.config.dim_sem]
        ], 2)
        sents_str = tf.concat([
            sents_output[0][:, :, self.config.dim_sem:],
            sents_output[1][:, :, self.config.dim_sem:]
        ], 2)

        str_scores_ = get_structure(
            'doc', sents_str, max_doc_l, self.t_variables['mask_parser_1'],
            self.t_variables['mask_parser_2'])  #batch_l,  sent_l+1, sent_l
        str_scores = tf.matrix_transpose(str_scores_)  # soft parent
        sents_sem_root = tf.concat(
            [tf.tile(embeddings_root, [batch_l, 1, 1]), sents_sem], 1)
        sents_output_ = tf.matmul(str_scores, sents_sem_root)
        sents_output = LReLu(
            tf.tensordot(tf.concat([sents_sem, sents_output_], 2), w_comb,
                         [[2], [0]]) + b_comb)

        if (self.config.doc_attention == 'sum'):
            sents_output = sents_output * tf.expand_dims(mask_sents, 2)
            sents_output = tf.reduce_sum(sents_output, 1)
        elif (self.config.doc_attention == 'mean'):
            sents_output = sents_output * tf.expand_dims(mask_sents, 2)
            sents_output = tf.reduce_sum(sents_output, 1) / tf.expand_dims(
                tf.cast(doc_l, tf.float32), 1)
        elif (self.config.doc_attention == 'max'):
            sents_output = sents_output + tf.expand_dims(
                (mask_sents - 1) * 999, 2)
            sents_output = tf.reduce_max(sents_output, 1)

        final_output = MLP(sents_output, 'output',
                           self.t_variables['keep_prob'])
        self.final_output = tf.matmul(final_output, w_softmax) + b_softmax
Exemplo n.º 14
0
from neural import MLP

df = pd.read_csv('data.csv')

df = df.iloc[np.random.permutation(len(df))]
y = df.iloc[0:100, 4].values
y = np.where(y == "Iris-setosa", 1, 0).reshape(-1, 1)
X = df.iloc[0:100, [0, 2]].values

inputSize = X.shape[
    1]  # количество входных сигналов равно количеству признаков задачи
hiddenSizes = 10  # задаем число нейронов скрытого (А) слоя
outputSize = 1 if len(y.shape) else y.shape[
    1]  # количество выходных сигналов равно количеству классов задачи

iterations = 50
learning_rate = 0.1

net = MLP(inputSize, outputSize, learning_rate, hiddenSizes)

# обучаем сеть (фактически сеть это вектор весов weights)
for i in range(iterations):
    net.train(X, y)

    if i % 10 == 0:
        print("На итерации: " + str(i) + ' || ' + "Средняя ошибка: " +
              str(np.mean(np.square(y - net.predict(X)))))

# считаем ошибку на обучающей выборке
pr = net.predict(X)
print(sum(abs(y - (pr > 0.5))))
Exemplo n.º 15
0
class PropagationCore(BaseSQAIRCore):
    """Recurrent propagation core.

    It is run iteratively to propagate several objects.
    """
    _output_names = 'what what_sample what_loc what_scale where where_sample where_loc where_scale presence_prob' \
                    ' presence presence_logit temporal_state'.split()

    _init_presence_value = 0.  # at the beginning we assume no objects
    _what_scale_bias = -3.

    def __init__(self,
                 img_size,
                 crop_size,
                 n_what,
                 transition,
                 input_encoder,
                 glimpse_encoder,
                 transform_estimator,
                 steps_predictor,
                 temporal_cell,
                 where_update_scale=1.0,
                 debug=False):
        """Initialises the model.

        If argument is not covered here, see BaseSQAIRCore for documentation.

        :param temporal_cell: RNNCore for the temporal rnn.
        :param where_update_scale: Float, rescales the update of the `where` variables.
        """

        super(PropagationCore, self).__init__(img_size,
                                              crop_size,
                                              n_what,
                                              transition,
                                              input_encoder,
                                              glimpse_encoder,
                                              transform_estimator,
                                              steps_predictor,
                                              debug=debug)

        self._temporal_cell = temporal_cell
        with self._enter_variable_scope():
            self._where_update_scale = tf.get_variable(
                'where_update_scale',
                shape=[],
                dtype=tf.float32,
                initializer=tf.constant_initializer(where_update_scale),
                trainable=False)
            self._where_distrib = AffineDiagNormal(
                validate_args=self._debug, allow_nan_stats=not self._debug)

    @property
    def output_size(self):
        return [
            self._n_what,  # what code
            self._n_what,  # what sample
            self._n_what,  # what loc
            self._n_what,  # what scale
            self._n_transform_param,  # where code
            self._n_transform_param,  # where sample
            self._n_transform_param,  # where loc
            self._n_transform_param,  # where scale
            1,  # presence prob
            1,  # presence
            1,  # presence_logit,
            self._temporal_cell.state_size,
        ]

    def _build(self, (z_tm1, temporal_hidden_state), state):
        """Input is unused; it's only to force a maximum number of steps"""
        # same object, previous timestep
        what_tm1, where_tm1, presence_tm1, presence_logit_tm1 = z_tm1
        temporal_state = nest.flatten(temporal_hidden_state)[-1]

        # different object, current timestep
        img_flat, what_km1, where_km1, presence_km1, hidden_state = state

        img = tf.reshape(img_flat, (-1, ) + tuple(self._img_size))

        with tf.variable_scope('rnn_inpt'):

            where_bias = MLP(128, n_out=4)(temporal_state) * .1
            what_distrib = self._glimpse_encoder(img,
                                                 where_tm1 + where_bias,
                                                 mask_inpt=temporal_state)[0]
            rnn_inpt = what_distrib.loc

            rnn_inpt = [
                rnn_inpt,  # img
                what_km1,
                where_km1,
                presence_km1,  # explaining away
                what_tm1,
                where_tm1,
                presence_tm1,
                temporal_state  # previous state
            ]

            rnn_inpt = tf.concat(rnn_inpt, -1)
            hidden_output, hidden_state = self._cell(rnn_inpt, hidden_state)

        with tf.variable_scope('where'):
            where, where_sample, where_loc, where_scale = self._compute_where(
                where_tm1, hidden_output, temporal_state)

        with tf.variable_scope('what'):
            what, what_sample, what_loc, what_scale, temporal_hidden_state\
                = self._compute_what(img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state)

        with tf.variable_scope('presence'):
            presence, presence_prob, presence_logit \
                = self._compute_presence(presence_tm1, presence_logit_tm1, hidden_output, temporal_state, what)

        output = [
            what, what_sample, what_loc, what_scale, where, where_sample,
            where_loc, where_scale, presence_prob, presence, presence_logit,
            temporal_hidden_state
        ]
        new_state = [img_flat, what, where, presence, hidden_state]

        return output, new_state
Exemplo n.º 16
0
class PropagationCore(BaseSQAIRCore):
    """Recurrent propagation core.

    It is run iteratively to propagate several objects.
    """


    _output_names = 'what what_sample what_loc what_scale where where_sample where_loc where_scale presence_prob' \
                    ' presence presence_logit temporal_state'.split()

    _init_presence_value = 0.  # at the beginning we assume no objects
    _what_scale_bias = -3.
  

    def __init__(self, img_size, crop_size, n_what,
                 transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, temporal_cell,
                 where_update_scale=1.0, debug=False):
        """Initialises the model.

        If argument is not covered here, see BaseSQAIRCore for documentation.

        :param temporal_cell: RNNCore for the temporal rnn.
        :param where_update_scale: Float, rescales the update of the `where` variables.
        """
        #inhereting from BaseSQAIRCore
        super(PropagationCore, self).__init__(img_size, crop_size, n_what,
                 transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, debug = debug)

        #adding temporal RNN which we get as input parameter
        self._temporal_cell = temporal_cell 

        with tf._enter_variable_scope():
            #getting st deviation for where latent variable distribution
            self._where_update_scale = tf.get_variable('where_update_scale', shape[], dtype = tf.float32, 
                initializer = tf.constant_initializer(where_update_scale), trainable = False)


            #specifying distribution for where latent variables
            self._where_distrib = AffineDiagNormal()

    @property
    def output_size(self):
        return [
            self._n_what,  # what code
            self._n_what,  # what sample
            self._n_what,  # what loc
            self._n_what,  # what scale
            self._n_transform_param,  # where code
            self._n_transform_param,  # where sample
            self._n_transform_param,  # where loc
            self._n_transform_param,  # where scale
            1,  # presence prob
            1,  # presence
            1,  # presence_logit,
            self._temporal_cell.state_size,
        ]
       
    def _build(self, (z_tm1, temporal_hidden_state), state):
        """Input is unused; it's only to force a maximum number of steps"""
        # same object, previous timestep

        #splitting z_tm1 into what where and presense along with the presense logits
        #getting the object from the previous time step
        what_tm1, where_tm1, presence_tm1, presence_logit_tm1 = z_tm1
        #getting numpy list of flattened temporal_hidden_state
        temporal_state = nest.flatten(temporal_hidden_state)[-1]

        #initialize another object itself,and  latent variables for another object at the current time step
        img_flat, what_km1, where_km1, presence_km1, hidden state = state
        
        #transforming vector of image pixels into matrix of pixels
        img = tf.reshape(img_flat, (-1,) + tuple(self._img_size))

        with tf.variable_scope('rnn_inpt'):
            #achieving the bias value that we add to where latent variable from the previous time step in order to get new 
            #proposal where latent variable for the current step
            where_bias = MLP(128, n_out=4)(temporal_state) * .1
            #extracting and encoding proposal glimpse(includes spatial transformer inside)
            what_distrib = self._glimpse_encoder(img, where_tm1 + where_bias, mask_inpt=temporal_state)[0]
            #taking the mean of what dustribution for the object which corresponds to our object
            rnn_inpt = what_distrib.loc
            #constructing the input to relational RNN
            rnn_inpt = [
                rnn_inpt,                                             # img
                what_km1, where_km1, presence_km1,                    # explaining away
                what_tm1, where_tm1, presence_tm1, temporal_state     # previous state
            ]
            #making tensor from array
            rnn_inpt = tf.concat(rnn_inpt, -1)
            #getting the output from relational RNN
            hidden_output, hidden_state = self._cell(rnn_inpt, hidden_state)

        #sample latent variable 'where' for the current time step using 'where' from the previous time step and weights from 
        #relational rnn    
        with tf.variable_scope('where'):
            where, where_sample, where_loc, where_scale = self._compute_where(where_tm1, hidden_output, temporal_state)

        #sample latent variable 'what' for the current time step using 'what' from the previous time step and weights from 
        #relational and temporal rnns alonf with encoded glimpse of the object 
        with tf.variable_scope('what'):
            what, what_sample, what_loc, what_scale, temporal_hidden_state\
                = self._compute_what(img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state)

        #compute the presense of the oobject for the current time step using
        #presence from the previous time step, 'what' and 'where' latent variable from the current timestep
        #and weights from relational and temporal RNNs
        with tf.variable_scope('presence'):
            presence, presence_prob, presence_logit \
                = self._compute_presence(presence_tm1, presence_logit_tm1, hidden_output, temporal_state, what)

        output = [what, what_sample, what_loc, what_scale, where, where_sample, where_loc, where_scale,
            presence_prob, presence, presence_logit, temporal_hidden_state]
        
        new_state = [img_flat, what, where, presence, hidden_state]

        return output, new_state
Exemplo n.º 17
0
    def build(self):
        with tf.variable_scope("Embeddings"):
            self.embeddings = tf.get_variable("emb", [self.config.n_embed, self.config.d_embed], dtype=tf.float64,
                                         initializer=self.xavier_init)
            embeddings_root = tf.get_variable("emb_root", [1, 1, 2 * self.config.dim_sem], dtype=tf.float64,
                                                  initializer=self.xavier_init)
            embeddings_root_s = tf.get_variable("emb_root_s", [1, 1,2* self.config.dim_sem], dtype=tf.float64,
                                                    initializer=self.xavier_init)
        with tf.variable_scope("Model"):
            w_comb = tf.get_variable("w_comb", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64,
                            initializer=self.xavier_init)
            w_comb_both = tf.get_variable("w_comb_both", [6 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64,
                                     initializer=self.xavier_init)
            b_comb = tf.get_variable("bias_comb", [2 * self.config.dim_sem], dtype=tf.float64, initializer=tf.constant_initializer())

            w_comb_s = tf.get_variable("w_comb_s", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64,
                            initializer=self.xavier_init)
            b_comb_s = tf.get_variable("bias_comb_s", [2 * self.config.dim_sem], dtype=tf.float64, initializer=tf.constant_initializer())

            w_softmax = tf.get_variable("w_softmax", [2 * self.config.dim_sem, self.config.dim_output], dtype=tf.float64,
                            initializer=self.xavier_init)
            b_softmax = tf.get_variable("bias_softmax", [self.config.dim_output], dtype=tf.float64,
                            initializer=self.xavier_init)

            w_sem_doc = tf.get_variable("w_sem_doc", [2 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64,
                                        initializer=self.xavier_init)

            w_str_doc = tf.get_variable("w_str_doc", [2 * self.config.dim_sem, 2 * self.config.dim_str], dtype=tf.float64,
                                        initializer=self.xavier_init)

        with tf.variable_scope("Structure/doc"):
            tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float64,
                            initializer=self.xavier_init)
        with tf.variable_scope("Structure/sent"):
            tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)

            tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64,
                            initializer=self.xavier_init)
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float64,
                            initializer=self.xavier_init)

        sent_l = self.t_variables['sent_l']
        doc_l = self.t_variables['doc_l']
        max_sent_l = self.t_variables['max_sent_l']
        max_doc_l = self.t_variables['max_doc_l']
        batch_l = self.t_variables['batch_l']

        tokens_input = tf.nn.embedding_lookup(self.embeddings, self.t_variables['token_idxs'][:, :max_doc_l, :max_sent_l])
        tokens_input = tf.nn.dropout(tokens_input, self.t_variables['keep_prob'])  # [batch_size, doc_l, sent_l, d_embed]

        mask_tokens = self.t_variables['mask_tokens'][:, :max_doc_l, :max_sent_l]
        mask_sents = self.t_variables['mask_sents'][:, :max_doc_l]  # [batch_size, doc_l]

        tokens_input_do = tf.reshape(tokens_input, [batch_l * max_doc_l, max_sent_l, self.config.d_embed])
        sent_l = tf.reshape(sent_l, [batch_l * max_doc_l])
        mask_tokens = tf.reshape(mask_tokens, [batch_l * max_doc_l, -1])
        tokens_output, _ = dynamicBiRNN(tokens_input_do, sent_l, n_hidden=self.config.dim_hidden, xavier_init=self.xavier_init,
                                        cell_type=self.config.rnn_cell, cell_name='Model/sent')
        tokens_sem = tf.concat([tokens_output[0][:,:,:self.config.dim_sem], tokens_output[1][:,:,:self.config.dim_sem]], 2)
        tokens_str = tf.concat([tokens_output[0][:,:,self.config.dim_sem:], tokens_output[1][:,:,self.config.dim_sem:]], 2)

        if self.config.skip_sent_attention:
            tokens_output = LReLu(tf.tensordot(tf.concat([tokens_sem, tokens_input_do], 2), w_comb_s, [[2], [0]]) + b_comb_s)
        else:
            temp1 = tf.zeros([batch_l * max_doc_l, max_sent_l,1], tf.float64)
            temp2 = tf.zeros([batch_l * max_doc_l,1,max_sent_l], tf.float64)

            mask1 = tf.ones([batch_l * max_doc_l, max_sent_l, max_sent_l-1], tf.float64)
            mask2 = tf.ones([batch_l * max_doc_l, max_sent_l-1, max_sent_l], tf.float64)
            mask1 = tf.concat([temp1,mask1],2)
            mask2 = tf.concat([temp2,mask2],1)

            if self.config.skip_mask_bug_fix:
                str_scores_s_, _, LL_tokens = get_structure('sent', tokens_str, mask1, mask2, None, None, None)  # batch_l,  sent_l+1, sent_l
            else:
                # create mask for setting all padded cells to 0
                mask_ll_tokens = tf.expand_dims(mask_tokens, 2)
                mask_ll_tokens_trans = tf.transpose(mask_ll_tokens, perm=[0, 2, 1])
                mask_ll_tokens = mask_ll_tokens
                mask_tokens_mult = mask_ll_tokens * mask_ll_tokens_trans

                # create mask for setting the padded diagonals to 1
                mask_diags = tf.matrix_diag_part(mask_tokens_mult)
                mask_diags_invert = tf.cast(tf.logical_not(tf.cast(mask_diags, tf.bool)), tf.float64)
                zero_matrix = tf.zeros([batch_l * max_doc_l, max_sent_l, max_sent_l], tf.float64)
                mask_tokens_add = tf.matrix_set_diag(zero_matrix, mask_diags_invert)

                str_scores_s_, _, LL_tokens = get_structure('sent', tokens_str, mask1, mask2, mask_tokens_mult,
                                                            mask_tokens_add, tf.expand_dims(mask_tokens,
                                                                                            2))  # batch_l,  sent_l+1, sent_l

            str_scores_s = tf.matrix_transpose(str_scores_s_)  # soft parent
            tokens_sem_root = tf.concat([tf.tile(embeddings_root_s, [batch_l * max_doc_l, 1, 1]), tokens_sem], 1)
            tokens_output_ = tf.matmul(str_scores_s, tokens_sem_root)
            tokens_output = LReLu(tf.tensordot(tf.concat([tokens_sem, tokens_output_], 2), w_comb_s, [[2], [0]]) + b_comb_s)

        if (self.config.sent_attention == 'sum'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens,2)
            tokens_output = tf.reduce_sum(tokens_output, 1)
        elif (self.config.sent_attention == 'mean'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens,2)
            tokens_output = tf.reduce_sum(tokens_output, 1)/tf.expand_dims(tf.cast(sent_l,tf.float64),1)
        elif (self.config.sent_attention == 'max'):
            tokens_output = tokens_output + tf.expand_dims((mask_tokens-1)*999,2)
            tokens_output = tf.reduce_max(tokens_output, 1)

        # batch_l * max_doc_l, 200
        if self.config.skip_doc_bilstm:
            if self.config.use_positional_encoding:
                tokens_output = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem])
                tokens_output = self.add_timing_signal(tokens_output, max_doc_l, num_timescales=self.config.dim_sem)
                tokens_output = tf.reshape(tokens_output, [batch_l * max_doc_l, 2 * self.config.dim_sem])

            sents_sem = tf.matmul(tokens_output, w_sem_doc)
            sents_sem = tf.reshape(sents_sem, [batch_l, max_doc_l, 2 * self.config.dim_sem])
            sents_str = tf.matmul(tokens_output, w_str_doc)
            sents_str = tf.reshape(sents_str, [batch_l, max_doc_l, 2 * self.config.dim_str])
        else:
            sents_input = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem])
            sents_output, _ = dynamicBiRNN(sents_input, doc_l, n_hidden=self.config.dim_hidden, xavier_init=self.xavier_init, 
                                           cell_type=self.config.rnn_cell, cell_name='Model/doc')
            sents_sem = tf.concat([sents_output[0][:,:,:self.config.dim_sem], sents_output[1][:,:,:self.config.dim_sem]], 2)  # [batch_l, doc+l, dim_sem*2]
            sents_str = tf.concat([sents_output[0][:,:,self.config.dim_sem:], sents_output[1][:,:,self.config.dim_sem:]], 2)  # [batch_l, doc+l, dim_str*2]

        if self.config.skip_doc_attention:
            if self.config.skip_doc_bilstm:
                sents_input = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem])
                sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_input], 2), w_comb, [[2], [0]]) + b_comb)
            else:
                sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_input], 2), w_comb, [[2], [0]]) + b_comb)
        else:
            if self.config.skip_mask_bug_fix:
                str_scores_, str_scores_no_root, LL_sents = get_structure('doc', sents_str, self.t_variables['mask_parser_1'],
                                                                  self.t_variables['mask_parser_2'], None, None, None)  # [batch_size, doc_l+1, doc_l]
            else:
                # create mask for setting all padded cells to 0
                mask_ll_sents = tf.expand_dims(mask_sents, 2)
                mask_ll_sents_trans = tf.transpose(mask_ll_sents, perm=[0, 2, 1])
                mask_ll_sents = mask_ll_sents
                mask_sents_mult = mask_ll_sents * mask_ll_sents_trans

                # create mask for setting the padded diagonals to 1
                mask_sents_diags = tf.matrix_diag_part(mask_sents_mult)
                mask_sents_diags_invert = tf.cast(tf.logical_not(tf.cast(mask_sents_diags, tf.bool)), tf.float64)
                zero_matrix_sents = tf.zeros([batch_l, max_doc_l, max_doc_l], tf.float64)
                mask_sents_add = tf.matrix_set_diag(zero_matrix_sents, mask_sents_diags_invert)

                str_scores_, str_scores_no_root, LL_sents = get_structure('doc', sents_str, self.t_variables['mask_parser_1'],
                                                                  self.t_variables['mask_parser_2'], mask_sents_mult,
                                                                  mask_sents_add, tf.expand_dims(mask_sents,
                                                                                                 2))  # [batch_size, doc_l+1, doc_l]

            str_scores = tf.matrix_transpose(str_scores_)
            self.str_scores = str_scores  # shape is [batch_size, doc_l, doc_l+1]

            sents_children = tf.matmul(str_scores_no_root, sents_sem)
            if self.config.tree_percolation == "child":
                sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_children], 2), w_comb, [[2], [0]]) + b_comb)
            else:
                sents_sem_root = tf.concat([tf.tile(embeddings_root, [batch_l, 1, 1]), sents_sem], 1)
                sents_parents = tf.matmul(str_scores, sents_sem_root)
                if self.config.tree_percolation == "parent":
                    sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_parents], 2), w_comb, [[2], [0]]) + b_comb)
                elif self.config.tree_percolation == "both":
                    sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_parents, sents_children], 2), w_comb_both, [[2], [0]]) + b_comb)

            # percolation is only supported for "child" option
            if self.config.tree_percolation_levels > 0:
                count = 0
                while count < self.config.tree_percolation_levels:
                    sents_children_2 = tf.matmul(str_scores_no_root, sents_output)
                    sents_output = LReLu(tf.tensordot(tf.concat([sents_output, sents_children_2], 2), w_comb, [[2], [0]]) + b_comb)
                    count += 1

        if (self.config.doc_attention == 'sum'):
            sents_output = sents_output * tf.expand_dims(mask_sents, 2)  # mask is [batch_size, doc_l, 1]
            sents_output = tf.reduce_sum(sents_output, 1)  # [batch_size, dim_sem*2]
        elif (self.config.doc_attention == 'mean'):
            sents_output = sents_output * tf.expand_dims(mask_sents, 2)
            sents_output = tf.reduce_sum(sents_output, 1)/tf.expand_dims(tf.cast(doc_l,tf.float64),1)
        elif (self.config.doc_attention == 'max'):
            sents_output = sents_output + tf.expand_dims((mask_sents-1)*999,2)
            sents_output = tf.reduce_max(sents_output, 1)
        elif (self.config.doc_attention == 'weighted_sum'):
            sents_weighted = sents_output * tf.expand_dims(str_scores[:,:,0], 2)
            sents_output = sents_weighted * tf.expand_dims(mask_sents, 2)  # apply mask
            sents_output = tf.reduce_sum(sents_output, 1)

        final_output = MLP(sents_output, 'output', self.t_variables['keep_prob'], self.config.seed, self.xavier_init)
        self.final_output = tf.matmul(final_output, w_softmax) + b_softmax