示例#1
0
    def add_prediction_op(self):
        fs = [5, 5] # filter sizes
        cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer]

        # First conv layer
        W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]])
        b_conv1 = utils.bias_variable([cs[1]])

        h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1)

        # Second conv layer
        W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]])
        b_conv2 = utils.bias_variable([cs[2]])

        h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2)

        # First fully connected layer. Reshape the convolution output to 1D vector
        W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024])
        b_fc1 = utils.bias_variable([1024])

        h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]])
        h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

        # Dropout (should be added to earlier layers too...)
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # Final fully-connected layer
        W_fc2 = utils.weight_variable([1024, 1])
        b_fc2 = utils.bias_variable([1])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        y_out = tf.sigmoid(y_conv)
        is_zero = tf.clip_by_value(tf.reduce_sum(self.x), 0, 1) # basically will be 1 iff at least one entry of x is nonzero
        y_out = tf.multiply(y_out, is_zero)
        return y_out
示例#2
0
    def add_prediction_op(self):
        fs = [5, 5]  # filter sizes
        cs = [
            4, 40, 80
        ]  # cs[i] is output number of channels from layer i [where layer 0 is input layer]

        # First conv layer
        W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]])
        b_conv1 = utils.bias_variable([cs[1]])

        h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1)

        # Second conv layer
        W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]])
        b_conv2 = utils.bias_variable([cs[2]])

        h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2)

        # First fully connected layer. Reshape the convolution output to 1D vector
        W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024])
        b_fc1 = utils.bias_variable([1024])

        h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]])
        h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

        # Dropout (should be added to earlier layers too...)
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # Final fully-connected layer
        W_fc2 = utils.weight_variable([1024, 3])
        b_fc2 = utils.bias_variable([1])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

        return y_conv
示例#3
0
 def res_block(self, name, inputs):
     output = inputs
     output = tf.nn.relu(output)
     output = utils.conv1d(name + '.1', self.filter_output_dim,
                           self.filter_output_dim, 5, output)
     output = tf.nn.relu(output)
     output = utils.conv1d(name + '.2', self.filter_output_dim,
                           self.filter_output_dim, 5, output)
     return inputs + (self.res_rate * output)
    def add_prediction_op(self):
        fs = [5, 5] # filter sizes
        cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer]

        # First conv layer
        W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]])
        b_conv1 = utils.bias_variable([cs[1]])

        h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1)

        # Second conv layer
        W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]])
        b_conv2 = utils.bias_variable([cs[2]])

        h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2)

        # Conv layer on top of the coverage
        W_conv_coverage = utils.weight_variable([fs[0], 1, cs[2]])
        b_conv_coverage = utils.bias_variable([cs[2]])

        conv_c = tf.expand_dims(self.e, -1)
        #print(conv_c.shape, W_conv_coverage.shape, b_conv_coverage.shape)
        h_conv_coverage = utils.lrelu(utils.conv1d(conv_c, W_conv_coverage) + b_conv_coverage)

        h_concatenated = tf.concat([h_conv2, h_conv_coverage], axis = -1)
        # First fully connected layer. Reshape the convolution output to 1D vector

        orig_shape = h_concatenated.get_shape().as_list()
        flat_shape = np.prod(orig_shape[1:])
        new_shape = [-1,] + [flat_shape]
        h_concatenated_flat = tf.reshape(h_concatenated, new_shape)
        h_concat_drop = tf.nn.dropout(h_concatenated_flat, self.keep_prob)
        fc1_in = h_concatenated_flat.get_shape().as_list()[-1]
        W_fc1 = utils.weight_variable([fc1_in, 1024])
        b_fc1 = utils.bias_variable([1024])
        h_fc1 = utils.lrelu(tf.matmul(h_concat_drop, W_fc1) + b_fc1)

        # Fully-connected layer on top of the coverage
        #W_fc_coverage = utils.weight_variable([self.config.strlen, cs[2]])
        #b_fc_coverage = utils.bias_variable([cs[2]])

        #h_fc_coverage = tf.nn.relu(tf.matmul(self.e, W_fc_coverage) + b_fc_coverage)
        #h_concatenated = tf.concat([h_fc1, h_fc_coverage], axis = -1)

        # Dropout (should be added to earlier layers too...)
        #h_concatenated_drop = tf.nn.dropout(h_concatenated, self.keep_prob)
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # Final fully-connected layer
        W_fc2 = utils.weight_variable([1024, 1])
        b_fc2 = utils.bias_variable([1])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        y_out = tf.sigmoid(y_conv)

        return y_out
示例#5
0
    def add_prediction_op(self):
        left_half, right_half = tf.split(
            self.x, [self.config.window, self.config.window + 1], axis=1)
        # First conv layer
        W_convleft1 = utils.weight_variable([5, 4, 40])
        b_convleft1 = utils.bias_variable([40])

        W_convright1 = utils.weight_variable([5, 4, 40])
        b_convright1 = utils.bias_variable([40])

        h_convleft1 = utils.lrelu(
            utils.conv1d(left_half, W_convleft1) + b_convleft1)
        h_convright1 = utils.lrelu(
            utils.conv1d(right_half, W_convright1) + b_convright1)

        # Second conv layer
        W_convleft2 = utils.weight_variable([5, 40, 80])
        b_convleft2 = utils.bias_variable([80])

        W_convright2 = utils.weight_variable([5, 40, 80])
        b_convright2 = utils.bias_variable([80])

        h_convleft2 = utils.lrelu(
            utils.conv1d(h_convleft1, W_convleft2) + b_convleft2)
        h_convright2 = utils.lrelu(
            utils.conv1d(h_convright1, W_convright2) + b_convright2)

        h_convout = tf.concat([h_convleft2, h_convright2], 1)

        # First fully connected layer. Reshape the convolution output to 1D vector
        fc_dim_1 = int(self.config.strlen * 80 / 7.89)
        W_fc1 = utils.weight_variable([self.config.strlen * 80, fc_dim_1])
        b_fc1 = utils.bias_variable([fc_dim_1])

        h_conv_flat = tf.reshape(h_convout, [-1, self.config.strlen * 80])
        #h_conv_flat = tf.nn.dropout(h_conv_flat, self.keep_prob)
        h_fc1 = utils.lrelu(tf.matmul(h_conv_flat, W_fc1) + b_fc1)
        h_fc1 = tf.nn.dropout(h_fc1, self.keep_prob)

        # Final fully-connected layer
        W_fc2 = utils.weight_variable([fc_dim_1, 1])
        b_fc2 = utils.bias_variable([1])

        y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
        y_out = tf.sigmoid(y_conv)
        #TODO: Add separate filter with unshared weights that looks at center?

        return y_out
示例#6
0
    def discriminator(self,
                      inputs_logits,
                      num_blocks=3,
                      use_bias=False,
                      num_classes=1):
        '''
		The discriminator to score the distribution of time and event
		If the time is consistent with the history times, give high score.
		If it is on the constant, give low score.

		Implementation:
		CNN

		'''
        with tf.variable_scope('Discriminator'):
            inputs = tf.transpose(inputs_logits, [0, 2, 1])
            output = utils.conv1d('D.Input', 1, self.filter_output_dim,
                                  self.filter_size, inputs)
            output = self.res_block('D.1', output)
            output = self.res_block('D.2', output)
            output = self.res_block('D.3', output)
            output = self.res_block('D.4', output)
            output = self.res_block('D.5', output)
            output = tf.reshape(output,
                                [-1, self.num_steps * self.filter_output_dim])
            output = utils.linear('D.Output',
                                  self.num_steps * self.filter_output_dim, 1,
                                  output)
            return output
 def discriminator(self,
                   inputs_logits,
                   num_blocks=3,
                   use_bias=False,
                   num_classes=1):
     """
     The discriminator to score the distribution of time and event
     If the time is consistent with the history times, give high score.
     If it is on the constant, give low score.
     Implementation:
     CNN"""
     with tf.variable_scope('Discriminator'):
         # inputs = tf.transpose(inputs_logits, [0,2,1])
         inputs = inputs_logits
         output = utils.conv1d('D.Input', 1, self.filter_output_dim,
                               self.filter_size, inputs)
         output = self.res_block('D.1', output)
         output = self.res_block('D.2', output)
         output = self.res_block('D.3', output)
         output = self.res_block('D.4', output)
         output = self.res_block('D.5', output)
         output = tf.reshape(
             output,
             [-1, (self.length + self.num_steps) * self.filter_output_dim])
         # if the output size is 1, it is the discriminator score of D
         # if the output size is 2, it is a bi-classification result of D
         output = tf.nn.sigmoid(
             utils.linear('D.Output', (self.length + self.num_steps) *
                          self.filter_output_dim, 1, output))
         logging.info('The shape of output from D {}'.format(
             output.get_shape()))
         return output
    def encoder_RecConv(self, cell_type, inputs, t):
        with tf.variable_scope('Generator/Event-Time'):
            outputs_e = utils.build_encoder_graph_gru(
                inputs, self.hidden_size, self.num_layers, self.batch_size,
                self.num_steps, self.keep_prob, self.is_training,
                "Encoder_e" + cell_type)
            hidden_re = [tf.expand_dims(output_e, 1) for output_e in outputs_e]
            hidden_re = tf.concat(hidden_re, 1)

            inputs_t = tf.expand_dims(t, 2)
            output_t = utils.conv1d('G.T.Input', 1, self.filter_output_dim,
                                    self.filter_size, inputs_t)
            output_t = self.res_block('G.T.1', output_t)
            output_t = self.res_block('G.T.2', output_t)
            output_t = self.res_block('G.T.3', output_t)
            output_t = self.res_block('G.T.4', output_t)
            output_t = self.res_block('G.T.5', output_t)

            hidden_rt = tf.reshape(
                output_t, [-1, self.num_steps, self.filter_output_dim])
            # hidden_r = tf.concat([hidden_re, hidden_rt], 2)
            # hidden_r = tf.reshape(hidden_r, [self.batch_size, -1])
            # add a self-attention layer
            hidden_re = self.encoder_attention(hidden_re, 'SA4E')
            hidden_rt = self.encoder_attention(hidden_rt, 'SA4T')
            return hidden_re, hidden_rt
示例#9
0
    def forward(self, x, n_state, past):
        assert len(x.shape) == 3  # Should be [batch, sequence, features]
        assert n_state % self.n_head == 0
        if past is not None:
            assert len(
                past.shape
            ) == 5  # Should be [batch, 2, heads, sequence, features], where 2 is [k, v]

        c = utils.conv1d(x, nf=n_state * 3)
        q, k, v = map(self.split_heads, c.split(c.shape[-1] // 3, dim=2))
        present = torch.stack([k, v], dim=1)

        if past is not None:
            pk, pv = torch.unbind(past, dim=1)
            k = torch.cat([pk, k], dim=-2)
            v = torch.cat([pv, v], dim=-2)
        a = self.multihead_attn(q, k, v)
        a = self.merge_heads(a)
        a = utils.conv1d(a, nf=n_state)

        return a, present
示例#10
0
    def add_prediction_op(self):
        fs = [5, 5]  # filter sizes
        cs = [
            4, 40, 80
        ]  # cs[i] is number of output channels from layer i [where layer 0 is the input layer]

        # First conv layer
        W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]])
        b_conv1 = utils.bias_variable([cs[1]])
        h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1)

        # Second conv layer
        W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]])
        b_conv2 = utils.bias_variable([cs[2]])
        h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2)

        # First fully connected layer
        W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024])
        b_fc1 = utils.bias_variable([1024])

        h_conv2_flat = tf.reshape(
            h_conv2, [-1, self.config.strlen * cs[2]]
        )  # Reshape the convolution output to 1D vector and use this as input to the FC layer
        h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

        # Dropout (should be added to earlier layers too... (TODO: Further investigate performance with dropout at various points))
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # Final fully-connected layer
        W_fc2 = utils.weight_variable([1024, 1])
        b_fc2 = utils.bias_variable([1])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        y_out = tf.sigmoid(
            y_conv)  # Apply sigmoid to get a probability as the final output

        return y_out
示例#11
0
    def encoder_t(self, t):
        with tf.variable_scope('Generator'):
            inputs_t = tf.expand_dims(t, 2)
            output_t = utils.conv1d('G.T.Input', 1, self.filter_output_dim,
                                    self.filter_size, inputs_t)
            output_t = self.res_block('G.T.1', output_t)
            output_t = self.res_block('G.T.2', output_t)
            output_t = self.res_block('G.T.3', output_t)
            output_t = self.res_block('G.T.4', output_t)
            output_t = self.res_block('G.T.5', output_t)

            hidden_rt = tf.reshape(
                output_t, [-1, self.num_steps, self.filter_output_dim])
            # hidden_r = tf.concat([hidden_re, hidden_rt], 2)
            # hidden_r = tf.reshape(hidden_r, [self.batch_size, -1])
            return hidden_rt
示例#12
0
 def encoder(self, inputs, scope=None):
     '''Encode sentence and return a latent representation.'''
     with tf.variable_scope(scope or "Encoder"):
         if cfg.convolutional:
             out = inputs
             widths = [int(i) for i in cfg.conv_width.split(',')]
             for i, width in enumerate(widths):
                 out = utils.conv1d(out,
                                    cfg.hidden_size,
                                    width,
                                    1,
                                    'VALID',
                                    scope='conv%d' % i)
                 out = tf.contrib.layers.batch_norm(
                     inputs=out,
                     is_training=self.training,
                     scope='bn%d' % i)
                 if i < len(widths) - 1:
                     out = tf.nn.elu(out)
             z = tf.reduce_max(out, 1)
         else:
             if cfg.encoder_birnn:
                 outputs, fs = tf.nn.bidirectional_dynamic_rnn(
                     self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2),
                     self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2),
                     inputs,
                     sequence_length=self.lengths,
                     swap_memory=True,
                     dtype=tf.float32)
                 outputs = tf.concat(2, outputs)
                 fs = tf.concat(1, fs[0] +
                                fs[1])  # last states of fwd and bkwd
             else:
                 if cfg.encoder_summary == 'laststate':
                     inputs = tf.reverse_sequence(inputs, self.lengths, 1)
                 outputs, fs = tf.nn.dynamic_rnn(
                     self.rnn_cell(cfg.num_layers),
                     inputs,
                     sequence_length=self.lengths,
                     swap_memory=True,
                     dtype=tf.float32)
                 fs = tf.concat(1, fs)
             if cfg.encoder_summary == 'laststate':
                 fs = utils.highway(fs, scope='encoder_output_highway')
                 z = tf.nn.tanh(
                     utils.linear(fs,
                                  cfg.latent_size,
                                  True,
                                  scope='outputs_transform'))
             else:
                 outputs = tf.reshape(outputs, [-1, cfg.hidden_size])
                 outputs = utils.highway(outputs,
                                         scope='encoder_output_highway')
                 if cfg.encoder_summary == 'attention':
                     flat_input = tf.reshape(
                         inputs, [-1, inputs.get_shape()[2].value])
                     weights = utils.linear(tf.concat(
                         1, [flat_input, outputs]),
                                            cfg.hidden_size,
                                            True,
                                            scope='outputs_attention')
                     outputs = tf.reshape(
                         outputs, [cfg.batch_size, -1, cfg.hidden_size])
                     weights = tf.reshape(
                         weights, [cfg.batch_size, -1, cfg.hidden_size])
                     weights = tf.nn.softmax(weights, 1)
                     z = tf.reduce_sum(outputs * weights, [1])
                     z = tf.nn.tanh(
                         utils.linear(z,
                                      cfg.latent_size,
                                      True,
                                      scope='outputs_transform'))
                 elif cfg.encoder_summary == 'mean':
                     outputs = utils.linear(outputs,
                                            cfg.latent_size,
                                            True,
                                            scope='outputs_transform')
                     outputs = tf.reshape(
                         outputs, [cfg.batch_size, -1, cfg.latent_size])
                     z = tf.nn.tanh(tf.reduce_mean(outputs, [1]))
                 else:
                     raise ValueError(
                         'Invalid encoder_summary configuration.')
         z_mean = utils.linear(z,
                               cfg.latent_size,
                               True,
                               scope='encoder_z_mean')
         z_logvar = utils.linear(z,
                                 cfg.latent_size,
                                 True,
                                 scope='encoder_z_logvar')
     return z_mean, z_logvar
示例#13
0
 def mlp(self, x, n_state):
     nx = x.shape[-1]
     h = self.gelu(utils.conv1d(x, nf=n_state))
     h2 = utils.conv1d(h, nf=nx)
     return h2