示例#1
0
    def __init__(self, pc, vocab, layers, state_dim, final_hidden_dim, tied,
                 residual):
        self.vocab = vocab
        self.layers = layers
        self.state_dim = state_dim
        self.tied = tied
        self.residual = residual
        self.done_with_left = vocab.convert('</LEFT>')
        self.done_with_right = vocab.convert('</RIGHT>')
        vocab_size = len(self.vocab)

        self.pc = pc.add_subcollection()
        if not self.tied:
            self.word_embs = self.pc.add_lookup_parameters(
                (vocab_size, state_dim))

        self.top_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc)
        self.vertical_lstm = dy.LSTMBuilder(layers, state_dim, state_dim,
                                            self.pc)
        self.gate_mlp = MLP(self.pc, [2 * state_dim, state_dim, state_dim])
        self.open_constit_lstms = []
        self.debug_stack = []
        self.spine = []
        self.final_mlp = MLP(self.pc,
                             [state_dim, final_hidden_dim, vocab_size])

        self.top_initial_state = [
            self.pc.add_parameters((state_dim, )) for _ in range(2 * layers)
        ]
        self.open_initial_state = [
            self.pc.add_parameters((state_dim, )) for _ in range(2 * layers)
        ]
示例#2
0
 def computeLoss2(self, task_id, internal_id):
     with tf.variable_scope("regression_gate"):
         self.weights['regression_gate_task%i' % task_id] = MLP(
             2 * self.params['hidden_size'], 1, [],
             self.placeholders['out_layer_dropout_keep_prob'])
     with tf.variable_scope("regression"):
         self.weights['regression_transform_task%i' % task_id] = MLP(
             self.params['hidden_size'], 1, [],
             self.placeholders['out_layer_dropout_keep_prob'])
     computed_values = self.gated_regression(
         self.ops['final_node_representations'],
         self.weights['regression_gate_task%i' % task_id],
         self.weights['regression_transform_task%i' % task_id])
     diff = computed_values - self.placeholders['target_values'][
         internal_id, :]
     task_target_mask = self.placeholders['target_mask'][internal_id, :]
     task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER
     diff = diff * task_target_mask  # Mask out unused values
     self.ops['accuracy_task%i' %
              task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num
     task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num
     # Normalise loss to account for fewer task-specific examples in batch:
     task_loss = task_loss * (
         1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0))
     self.ops['losses'].append(task_loss)
示例#3
0
文件: model.py 项目: EnesBae/MXMNet
    def __init__(self, config: Config, num_spherical=7, num_radial=6, envelope_exponent=5):
        super(MXMNet, self).__init__()

        self.dim = config.dim           # 128
        self.n_layer = config.n_layer   # 6
        self.cutoff = config.cutoff     # 5

        self.embeddings = nn.Parameter(torch.ones((5, self.dim)))   # (5,128) : (원자 인덱싱, 원자 특성)

        self.rbf_l = BesselBasisLayer(16, 5, envelope_exponent)
        self.rbf_g = BesselBasisLayer(16, self.cutoff, envelope_exponent)
        self.sbf = SphericalBasisLayer(num_spherical, num_radial, 5, envelope_exponent)

        self.rbf_g_mlp = MLP([16, self.dim])
        self.rbf_l_mlp = MLP([16, self.dim])

        self.sbf_1_mlp = MLP([num_spherical * num_radial, self.dim])
        self.sbf_2_mlp = MLP([num_spherical * num_radial, self.dim])

        self.global_layers = torch.nn.ModuleList()
        for layer in range(config.n_layer):
            self.global_layers.append(Global_MP(config))

        self.local_layers = torch.nn.ModuleList()
        for layer in range(config.n_layer):
            self.local_layers.append(Local_MP(config))
        
        self.init()
示例#4
0
    def __init__(self, word_embedding_dim, hidden_dim, vocab_size,
                 tag_vocab_size, tag_embedding_dim, label_amount, dropout,
                 char_emb_dim, char_vocab_size):
        super(DependencyParser, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, word_embedding_dim)
        self.tag_embeddings = nn.Embedding(tag_vocab_size, tag_embedding_dim)

        self.char_embeddings = nn.Embedding(char_vocab_size, char_emb_dim)
        self.char_attention = nn.Linear(char_emb_dim, 1)
        self.lstm = nn.LSTM(word_embedding_dim + tag_embedding_dim,
                            hidden_dim,
                            num_layers=1,
                            bidirectional=True,
                            dropout=dropout)

        self.char_lstm = nn.LSTM(char_emb_dim,
                                 20,
                                 num_layers=1,
                                 dropout=dropout)
        self.hidden_to_relu_dep = nn.Linear(hidden_dim, hidden_dim)
        self.hidden_to_relu_head = nn.Linear(hidden_dim, hidden_dim)

        self.arc_dep = MLP(hidden_dim * 2, hidden_dim, 1, dropout)
        self.arc_head = MLP(hidden_dim * 2, hidden_dim, 1, dropout)

        self.label_dep = MLP(hidden_dim * 2, hidden_dim, 1, dropout)
        self.label_head = MLP(hidden_dim * 2, hidden_dim, 1, dropout)

        # add 1 for bias
        self.bi_affine_arcs = nn.Linear(hidden_dim + 1, hidden_dim, bias=False)
        self.bi_affine_labels_weights = nn.Parameter(
            torch.Tensor(label_amount, hidden_dim + 1, hidden_dim + 1))
        self.bi_affine_labels_weights.data.normal_(0, 1)
示例#5
0
    def make_task_output_model(
        self,
        placeholders: Dict[str, tf.Tensor],
        model_ops: Dict[str, tf.Tensor],
    ) -> None:
        placeholders['graph_nodes_list'] = \
            tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list')
        placeholders['target_values'] = \
            tf.placeholder(dtype=tf.float32, shape=[len(self.params['task_ids']), None], name='target_values')
        placeholders['out_layer_dropout_keep_prob'] = \
            tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob')

        task_metrics = {}
        losses = []
        final_node_feature_size = model_ops[
            'final_node_representations'].shape.as_list()[-1]
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    regression_gate = \
                        MLP(self.initial_node_feature_size + final_node_feature_size, 1, [],
                            placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    regression_transform = \
                        MLP(final_node_feature_size, 1, [],
                            placeholders['out_layer_dropout_keep_prob'])

                per_node_outputs = regression_transform(
                    model_ops['final_node_representations'])
                gate_input = tf.concat([
                    model_ops['final_node_representations'],
                    model_ops['initial_node_features']
                ],
                                       axis=-1)
                per_node_gated_outputs = tf.nn.sigmoid(
                    regression_gate(gate_input)) * per_node_outputs

                # Sum up all nodes per-graph
                per_graph_outputs = tf.unsorted_segment_sum(
                    data=per_node_gated_outputs,
                    segment_ids=placeholders['graph_nodes_list'],
                    num_segments=placeholders['num_graphs'])
                per_graph_outputs = tf.squeeze(per_graph_outputs)  # [g]

                per_graph_errors = per_graph_outputs - placeholders[
                    'target_values'][internal_id, :]
                task_metrics['abs_err_task%i' % task_id] = tf.reduce_sum(
                    tf.abs(per_graph_errors))
                tf.summary.scalar(
                    'mae_task%i' % task_id,
                    task_metrics['abs_err_task%i' % task_id] /
                    tf.cast(placeholders['num_graphs'], tf.float32))
                losses.append(tf.reduce_mean(0.5 *
                                             tf.square(per_graph_errors)))
        model_ops['task_metrics'] = task_metrics
        model_ops['task_metrics']['loss'] = tf.reduce_sum(losses)
        model_ops['task_metrics'][
            'total_loss'] = model_ops['task_metrics']['loss'] * tf.cast(
                placeholders['num_graphs'], tf.float32)
示例#6
0
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [],
                                                         name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(
            tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                    )
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], 1, [],
                        self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], 1, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id])

                diff = computed_values - self.placeholders['target_values'][
                    internal_id, :]
                task_target_mask = self.placeholders['target_mask'][
                    internal_id, :]
                task_target_num = tf.reduce_sum(
                    task_target_mask) + SMALL_NUMBER
                diff = diff * task_target_mask  # Mask out unused values
                self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(
                    tf.abs(tf.round(tf.abs(diff)) - 1))

                task_loss = tf.reduce_sum(
                    0.5 * tf.square(diff)) / task_target_num
                # Normalise loss to account for fewer task-specific examples in batch:
                task_loss = task_loss * (
                    1.0 /
                    (self.params['task_sample_ratios'].get(task_id) or 1.0))
                self.ops['losses'].append(task_loss)
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#7
0
    def __init__(self, config):
        super(Local_MP, self).__init__()
        self.dim = config.dim

        self.h_mlp = MLP([self.dim, self.dim])

        self.mlp_kj = MLP([3 * self.dim, self.dim])
        self.mlp_ji_1 = MLP([3 * self.dim, self.dim])
        self.mlp_ji_2 = MLP([self.dim, self.dim])
        self.mlp_jj = MLP([self.dim, self.dim])

        self.mlp_sbf1 = MLP([self.dim, self.dim, self.dim])
        self.mlp_sbf2 = MLP([self.dim, self.dim, self.dim])
        self.lin_rbf1 = nn.Linear(self.dim, self.dim, bias=False)
        self.lin_rbf2 = nn.Linear(self.dim, self.dim, bias=False)

        self.res1 = Res(self.dim)
        self.res2 = Res(self.dim)
        self.res3 = Res(self.dim)

        self.lin_rbf_out = nn.Linear(self.dim, self.dim, bias=False)

        self.h_mlp = MLP([self.dim, self.dim])

        self.y_mlp = MLP([self.dim, self.dim, self.dim, self.dim])
        self.y_W = nn.Linear(self.dim, 1)
示例#8
0
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None],
                                                            name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None],
                                                          name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob')
        self.placeholders['pre_id_vector'] = tf.placeholder(tf.float32, [None, len(self.params['task_ids'])],
                                                             name='pre_id_vector')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops['final_node_representations'] = self.compute_final_node_representations()
            else:
                self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        self.ops['predicted_values'] = []

        with tf.variable_scope("regression_gate"):
            self.weights['regression_gate_task'] = MLP(2 * self.params['hidden_size'], 1, [],
                                                                   self.placeholders['out_layer_dropout_keep_prob'])

        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                # with tf.variable_scope("regression_gate"):
                #     self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 1, [],
                #                                                            self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 1, [],
                                                                                self.placeholders['out_layer_dropout_keep_prob'])
                    self.weights['context_embedding_task%i' % task_id] = MLP(len(self.params['task_ids']), 1, [],
                                                                                self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(self.ops['final_node_representations'],
                                                        self.weights['regression_gate_task'],
                                                        self.weights['regression_transform_task%i' % task_id])
                context_values = tf.squeeze(self.weights['context_embedding_task%i' % task_id](self.placeholders['pre_id_vector']))
                computed_values = computed_values + context_values
                predictions = tf.nn.sigmoid(computed_values)
                self.ops['predicted_values'].append(predictions)
                diff = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.placeholders['target_values'][internal_id,:],logits=computed_values)
                task_target_mask = self.placeholders['target_mask'][internal_id,:]
                task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER
                diff = diff * task_target_mask  # Mask out unused values

                self.ops['accuracy_task%i' % task_id] = self.masked_accuracy(predictions,
                                                        self.placeholders['target_values'][internal_id,:], task_target_mask)

                task_loss = tf.reduce_sum(diff) / task_target_num
                # Normalise loss to account for fewer task-specific examples in batch:
                task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0))
                self.ops['losses'].append(task_loss)
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#9
0
    def __init__(self,
                 input_dims,
                 hid_dim=32,
                 kernel_size=(3, 3),
                 bn_kwargs={}):
        super(Core, self).__init__()

        # preparation
        C, H, W = input_dims
        assert H == W
        fc_dim = C * H * W  # flatten dimensions

        # padding to retain the layer size
        padding = [int((ks - 1) / 2) for ks in kernel_size]

        self.flatten = nn.Flatten()
        # value network
        self.value_net = MLP([fc_dim, hid_dim, H],
                             batch_norm=True,
                             bn_kwargs=bn_kwargs)

        # internal abstraction
        self.conv_net = nn.Sequential(
            nn.Conv2d(hid_dim,
                      hid_dim,
                      kernel_size=kernel_size,
                      padding=padding), nn.BatchNorm2d(hid_dim), nn.ReLU())

        # MRP model
        self.reward_net = MLP([fc_dim, hid_dim, H],
                              batch_norm=True,
                              bn_kwargs=bn_kwargs)

        # sigmoid to ensure the gammas and lambdas are in [-1, 1]
        self.gamma_net = MLP([fc_dim, hid_dim, H],
                             batch_norm=True,
                             activ_out=nn.Sigmoid,
                             bn_kwargs=bn_kwargs)
        self.lambda_net = MLP([fc_dim, hid_dim, H],
                              batch_norm=True,
                              activ_out=nn.Sigmoid,
                              bn_kwargs=bn_kwargs)

        # internal transition network
        self.state_net = nn.Sequential(
            nn.Conv2d(hid_dim,
                      hid_dim,
                      kernel_size=kernel_size,
                      padding=padding), nn.BatchNorm2d(hid_dim, **bn_kwargs),
            nn.ReLU(),
            nn.Conv2d(hid_dim,
                      hid_dim,
                      kernel_size=kernel_size,
                      padding=padding), nn.BatchNorm2d(hid_dim, **bn_kwargs),
            nn.ReLU())
示例#10
0
    def make_model(self):
        num_task_id = len(self.params['task_ids'])
        self.placeholders['target_values'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops['final_node_representations'] = self.compute_final_node_representations()
            else:
                self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 2, [],
                                                                           self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 2, [],
                                                                                self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(self.ops['final_node_representations'],
                                                        self.weights['regression_gate_task%i' % task_id],
                                                        self.weights['regression_transform_task%i' % task_id])

                #computed_values = tf.Print(computed_values-0.5, [computed_values-0.5, tf.shape(computed_values)], 'computed_values', summarize = 150)
                tv = self.placeholders['target_values'][internal_id,:] #tf.squeeze(
                #tv = tf.Print(tv, [tv, tf.shape(tv)], 'tv', summarize = 150)
                # if computed_values.shape.as_list() == tv.shape.as_list():
                #     tv = tf.squeeze(tv)
                labels = tf.argmax(tv, 1)
                prediction = tf.argmax(computed_values, 1)
                accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32))
                task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=computed_values, labels=tv))

                TP = tf.reduce_sum(prediction*labels)
                TN = tf.reduce_sum((1-prediction)*(1-labels))
                FP = tf.reduce_sum(prediction*(1-labels))
                FN = tf.reduce_sum((1-prediction)*labels)
                precision = TP / (TP + FP)
                recall = TP / (TP + FN)
                f1 = 2 * precision * recall / (precision + recall)

                self.ops['accuracy_task%i' % task_id] = accuracy
                self.ops['losses'].append(task_loss)

                self.ops['precision_task%i' % task_id] = precision
                self.ops['recall_task%i' % task_id] = recall
                self.ops['f1_task%i' % task_id] = f1
                
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#11
0
    def __init__(self, config):
        super(Global_MP, self).__init__()
        self.dim = config.dim

        self.h_mlp = MLP([self.dim, self.dim])

        self.res1 = Res(self.dim)
        self.res2 = Res(self.dim)
        self.res3 = Res(self.dim)
        self.mlp = MLP([self.dim, self.dim])

        self.x_edge_mlp = MLP([self.dim * 3, self.dim])
        self.linear = nn.Linear(self.dim, self.dim, bias=False)
示例#12
0
 def __init__(self, pc, layers, emb_dim, hidden_dim, vocab_size, tied):
     self.spec = (layers, emb_dim, hidden_dim, vocab_size)
     self.pc = pc.add_subcollection()
     self.rnn = dy.LSTMBuilder(layers, emb_dim, hidden_dim, self.pc)
     self.initial_state_params = [
         self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers)
     ]
     self.output_mlp = MLP(self.pc, [hidden_dim, hidden_dim, vocab_size])
     self.tied = tied
     if not self.tied:
         self.word_embs = self.pc.add_lookup_parameters(
             (vocab_size, emb_dim))
     self.dropout_rate = 0.0
示例#13
0
 def make_model(self):
     self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [], name='num_graphs')
     self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob')
     
     with tf.variable_scope("graph_model", reuse = tf.AUTO_REUSE):
         self.prepare_specific_graph_model()
         # This does the actual graph work:
         support_final_node_representations = self.get_feature(self.placeholders['support_x'],self.placeholders['support_roi'],self.placeholders['support_adj'],self.placeholders['is_training'])
         #[5,v,4096]
         target_final_node_representations = self.get_feature(self.placeholders['target_x'],self.placeholders['target_roi'],self.placeholders['target_adj'],self.placeholders['is_training'])
         #[75,v,4096]
     with tf.variable_scope("out_layer", reuse = tf.AUTO_REUSE):
         
         with tf.variable_scope("regression_gate"):
             self.weights['regression_node'] = MLP(self.params['out_size'], 5, [],
                                                   self.placeholders['out_layer_dropout_keep_prob'])
     
     node_loss = tf.constant(0.0)
     
     print('Node supervision')
     tv = self.placeholders['num_vertices']-1
     #sv = self.placeholders['support_v']
     v = self.placeholders['num_vertices']
     #qv = self.placeholders['target_v']
     support_node = support_final_node_representations[:, 1:, :]
     node_loss = []
     for i in range(5):
         last_h1 = tf.reshape(support_node[i], [-1, self.params['out_size']])
         node_out = self.weights['regression_node'](last_h1)
         node_out = tf.reshape(node_out, [-1, 5])
         node_labels = tf.tile(tf.reshape(self.placeholders['support_label'][i],(1,1)),[1, tv])
         print(node_labels.get_shape().as_list())
         node_labels = tf.reshape(node_labels, [-1])
         node_loss.append(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=node_labels,logits=node_out))
     target_node = target_final_node_representations[:, 1:, :]
     for i in range(50):
         last_h1 = tf.reshape(target_node[i], [-1, self.params['out_size']])
         node_out = self.weights['regression_node'](last_h1)
         node_out = tf.reshape(node_out, [-1, 5])
         node_labels = tf.tile(tf.reshape(self.placeholders['target_label'][i],(1,1)),[1, tv])
         node_labels = tf.reshape(node_labels, [-1])
         node_loss.append(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=node_labels,logits=node_out))
     node_loss = tf.reduce_mean(tf.concat(node_loss,0)) * self.params['node_lambda']
     
     support_final_node_representations = tf.reshape(support_final_node_representations[:,0,:],[5,self.params['out_size']])
     target_final_node_representations = tf.reshape(target_final_node_representations[:,0,:],[50,self.params['out_size']])
     
     similarities = cosine_d(target_final_node_representations,support_final_node_representations)
     
     similarities = tf.reshape(similarities, [50, 5])         
     support_set_labels = tf.one_hot(self.placeholders['support_label'], 5)
     preds = tf.squeeze(tf.matmul(tf.nn.softmax(similarities), support_set_labels))
     correct_prediction = tf.equal(tf.argmax(preds, 1), tf.cast(self.placeholders['target_label'], tf.int64))
     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
     targets = tf.one_hot(self.placeholders['target_label'], 5)
     print("preds ",preds.get_shape().as_list())   
     print("targets ",targets.get_shape().as_list())   
     mean_square_error_loss = tf.reduce_mean(tf.square((preds-1)*targets + preds*(1-targets)))
     self.ops['accuracy'] = accuracy
     self.ops['loss'] = mean_square_error_loss + node_loss
示例#14
0
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(
            tf.float32, [None], name='target_values')
        # self.placeholders['target_mask'] = tf.placeholder(tf.float32, [None],
        #                                                   name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [],
                                                         name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(
            tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            self.ops[
                'final_node_representations'] = self.compute_final_node_representations(
                )

        with tf.variable_scope("out_layer_task"):
            with tf.variable_scope("regression"):
                self.weights['regression_transform_task'] = MLP(
                    2 * self.params['hidden_size'], 1, [],
                    self.placeholders['out_layer_dropout_keep_prob'])
            accuracy, task_loss = self.regression(
                self.ops['final_node_representations'],
                self.weights['regression_transform_task'])
            # task_target_mask = self.placeholders['target_mask'][internal_id, :]
            # task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER
            # diff = diff * task_target_mask  # Mask out unused values
            self.ops['accuracy_task'] = accuracy
            # Normalise loss to account for fewer task-specific examples in batch:
            task_loss = task_loss * (1.0 / self.params['task_sample_ratios'])
            self.ops['loss'] = task_loss
示例#15
0
    def __init__(self, action_set, reward_function, feature_extractor, 
        hidden_dims=[50, 50], learning_rate=5e-4, buffer_size=50000, 
        batch_size=64, num_batches=100, starts_learning=5000, final_epsilon=0.02, 
        discount=0.99, target_freq=10, verbose=False, print_every=1, 
        test_model_path=None):

        Agent.__init__(self, action_set, reward_function)
        self.feature_extractor = feature_extractor
        self.feature_dim = self.feature_extractor.dimension

        # build Q network
        # we use a multilayer perceptron
        dims = [self.feature_dim] + hidden_dims + [len(self.action_set)]
        self.model = MLP(dims)

        if test_model_path is None:
            self.test_mode = False
            self.learning_rate = learning_rate
            self.buffer_size = buffer_size
            self.batch_size = batch_size
            self.num_batches = num_batches
            self.starts_learning = starts_learning
            self.epsilon = 1.0  # anneals starts_learning/(starts_learning + t)
            self.final_epsilon = 0.02
            self.timestep = 0
            self.discount = discount
            
            self.buffer = Buffer(self.buffer_size)

            self.target_net = MLP(dims)
            self.target_net.load_state_dict(self.model.state_dict())
            self.target_net.eval()

            self.target_freq = target_freq # target nn updated every target_freq episodes
            self.num_episodes = 0

            self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
            
            # for debugging purposes
            self.verbose = verbose
            self.running_loss = 1.
            self.print_every = print_every

        else:
            self.test_mode = True
            self.model.load_state_dict(torch.load(test_model_path))
            self.model.eval()
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_values')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [],
                                                         name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(
            tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                    )
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], 1, [],
                        self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], 1, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id])
                diff = computed_values - self.placeholders['target_values'][
                    internal_id, :]
                self.ops['accuracy_task%i' % task_id] = tf.reduce_mean(
                    tf.abs(diff))
                self.ops['losses'].append(tf.reduce_mean(0.5 * diff**2))
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#17
0
    def computeLoc(self, tokenRep, finalRep):
        name = "colsplit"
        hidden_size = self.params['hidden_size']
        with tf.variable_scope(name):
            with tf.variable_scope("W1"):
                W1 = MLP(hidden_size, hidden_size, [], 1.0)
            with tf.variable_scope("W2"):
                W2 = MLP(hidden_size, hidden_size, [], 1.0)
            with tf.variable_scope("W3"):
                W3 = MLP(hidden_size, 2, [], 1.0)

            mask = self.placeholders['nodeMask']
            nodeIndexInGraph = self.placeholders[
                'nodeIndexInGraph']  #[#Nodes,1]
            H2 = tf.gather_nd(finalRep, nodeIndexInGraph)  #[#Nodes, 100]
            H2 = tf.boolean_mask(H2, mask)

            H1 = tf.boolean_mask(tokenRep, mask)

            E1 = W1(H1) + W2(H2)  # [#Nodes, 100]
            E2 = W3(E1)  #[#Nodes, 2]
            newE2 = tf.transpose(E2)  #[2, #Nodes]
            return newE2
示例#18
0
    def __init__(self, T):
        super().__init__()
        self.T = T

        self.vert_encoder = MLP([256, 128, 32])
        self.edge_encoder = MLP([6, 18, 18, 16])

        self.edge_mlp = MLP([96 + 32 + 32, 80, 16])
        self.vert_mlp = MLP([64, 32, 32])
        self.flow_in_mlp = MLP([48, 56, 32])
        self.flow_out_mlp = MLP([48, 56, 32])

        self.edge_classifier = MLP([16, 16, 1], last_act=None)
示例#19
0
    def __init__(self,
                 pc,
                 action_vocab,
                 word_vocab_size,
                 rel_vocab_size,
                 layers,
                 hidden_dim,
                 labelled=True,
                 tied=False):
        self.labelled = labelled
        self.tied = tied
        self.action_vocab = action_vocab
        self.pc = pc.add_subcollection()
        action_vocab_size = len(action_vocab)

        if not self.tied:
            self.word_embs = self.pc.add_lookup_parameters(
                (word_vocab_size, hidden_dim))
        self.action_mlp = MLP(self.pc,
                              [hidden_dim, hidden_dim, action_vocab_size])
        self.word_mlp = MLP(self.pc, [hidden_dim, hidden_dim, word_vocab_size])

        self.combine_mlp = MLP(self.pc,
                               [2 * hidden_dim, hidden_dim, hidden_dim])

        self.stack_lstm = dy.LSTMBuilder(layers, hidden_dim, hidden_dim,
                                         self.pc)
        self.initial_state_params = [
            self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers)
        ]
        self.stack_embs = []

        if labelled:
            self.rel_embs = self.pc.add_lookup_parameters(
                (rel_vocab_size, hidden_dim))
            self.rel_mlp = MLP(self.pc,
                               [hidden_dim, hidden_dim, rel_vocab_size])
示例#20
0
class RNNLM:
    def __init__(self, pc, layers, emb_dim, hidden_dim, vocab_size, tied):
        self.spec = (layers, emb_dim, hidden_dim, vocab_size)
        self.pc = pc.add_subcollection()
        self.rnn = dy.LSTMBuilder(layers, emb_dim, hidden_dim, self.pc)
        self.initial_state_params = [
            self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers)
        ]
        self.output_mlp = MLP(self.pc, [hidden_dim, hidden_dim, vocab_size])
        self.tied = tied
        if not self.tied:
            self.word_embs = self.pc.add_lookup_parameters(
                (vocab_size, emb_dim))
        self.dropout_rate = 0.0

    def new_graph(self):
        self.output_mlp.new_graph()
        self.initial_state = [
            dy.parameter(p) for p in self.initial_state_params
        ]
        #self.exp = dy.scalarInput(-0.5)

    def set_dropout(self, r):
        self.dropout_rate = r
        self.output_mlp.set_dropout(r)
        self.rnn.set_dropout(r)

    def embed_word(self, word):
        if self.tied:
            word_embs = self.output_mlp.layers[-1].w
            word_emb = dy.select_rows(word_embs, [word])
            word_emb = dy.transpose(word_emb)
        else:
            word_emb = dy.lookup(self.word_embs, word)

        # Normalize word vectors to have length one
        #word_emb_norm = dy.pow(dy.dot_product(word_emb, word_emb), self.exp)
        #word_emb = word_emb * word_emb_norm
        return word_emb

    def build_graph(self, sent):
        state = self.rnn.initial_state()
        state = state.set_s(self.initial_state)

        losses = []
        for word in sent:
            assert state != None
            so = state.output()
            assert so != None
            output_dist = self.output_mlp(so)
            loss = dy.pickneglogsoftmax(output_dist, word)
            losses.append(loss)
            word_emb = self.embed_word(word)
            if self.dropout_rate > 0.0:
                word_emb = dy.dropout(word_emb, self.dropout_rate)

            state = state.add_input(word_emb)
        return dy.esum(losses)

    def sample(self, eos, max_len):
        #dy.renew_cg()
        #self.new_graph()
        state = self.rnn.initial_state()
        state = state.set_s(self.initial_state)
        sent = []
        while len(sent) < max_len:
            assert state != None
            so = state.output()
            assert so != None
            output_dist = dy.softmax(self.output_mlp(so))
            output_dist = output_dist.vec_value()
            word = sample(output_dist)
            sent.append(word)
            if word == eos:
                break
            word_emb = self.embed_word(word)
            state = state.add_input(word_emb)
        return sent

    def param_collection(self):
        return self.pc

    @staticmethod
    def from_spec(spec, pc):
        rnnlm = RNNLM(pc, *spec)
        return rnnlm
示例#21
0
class TopDownDepLM:
    def __init__(self, pc, vocab, layers, state_dim, final_hidden_dim, tied,
                 residual):
        self.vocab = vocab
        self.layers = layers
        self.state_dim = state_dim
        self.tied = tied
        self.residual = residual
        self.done_with_left = vocab.convert('</LEFT>')
        self.done_with_right = vocab.convert('</RIGHT>')
        vocab_size = len(self.vocab)

        self.pc = pc.add_subcollection()
        if not self.tied:
            self.word_embs = self.pc.add_lookup_parameters(
                (vocab_size, state_dim))

        self.top_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc)
        self.vertical_lstm = dy.LSTMBuilder(layers, state_dim, state_dim,
                                            self.pc)
        self.gate_mlp = MLP(self.pc, [2 * state_dim, state_dim, state_dim])
        self.open_constit_lstms = []
        self.debug_stack = []
        self.spine = []
        self.final_mlp = MLP(self.pc,
                             [state_dim, final_hidden_dim, vocab_size])

        self.top_initial_state = [
            self.pc.add_parameters((state_dim, )) for _ in range(2 * layers)
        ]
        self.open_initial_state = [
            self.pc.add_parameters((state_dim, )) for _ in range(2 * layers)
        ]

    def set_dropout(self, r):
        self.dropout_rate = r
        self.top_lstm.set_dropout(r)
        self.vertical_lstm.set_dropout(r)
        self.final_mlp.set_dropout(r)

    def new_graph(self):
        # Do LSTM builders need reset?
        self.final_mlp.new_graph()
        self.gate_mlp.new_graph()

    def embed_word(self, word):
        if self.tied:
            word_embs = self.final_mlp.layers[-1].w
            word_emb = dy.select_rows(word_embs, [word])
            word_emb = dy.transpose(word_emb)
        else:
            word_emb = dy.lookup(self.word_embs, word)
        return word_emb

    def add_to_last(self, word):
        assert len(self.open_constit_lstms) > 0
        word_emb = self.embed_word(word)
        new_rep = self.open_constit_lstms[-1].add_input(word_emb)
        self.open_constit_lstms[-1] = new_rep

        self.debug_stack[-1].append(self.vocab.to_word(word))

    def pop_and_add(self, word):
        assert len(self.open_constit_lstms) >= 1
        word_emb = self.embed_word(word)
        child_state = self.open_constit_lstms[-1].add_input(word_emb)
        child_emb = child_state.output()
        self.open_constit_lstms.pop()
        if len(self.open_constit_lstms) > 0:
            self.open_constit_lstms[-1] = self.open_constit_lstms[
                -1].add_input(child_emb)
        self.spine.pop()

        self.debug_stack[-1].append(self.vocab.to_word(word))
        debug_child = self.debug_stack.pop()
        if len(self.debug_stack) > 0:
            self.debug_stack[-1].append(debug_child)

    def push(self, word):
        word_emb = self.embed_word(word)

        new_state = self.vertical_lstm.initial_state()
        new_state = new_state.set_s(self.open_initial_state)
        new_state = new_state.add_input(word_emb)
        self.open_constit_lstms.append(new_state)
        self.spine.append(word)

        self.debug_stack.append([self.vocab.to_word(word)])

    def add_input(self, state, word):
        word_emb = self.embed_word(word)
        if word == self.done_with_left:
            self.add_to_last(word)
        elif word == self.done_with_right:
            self.pop_and_add(word)
        else:
            self.push(word)
        #print('After:', self.debug_stack)
        assert len(self.debug_stack) == len(self.open_constit_lstms)
        return ParserState(self.open_constit_lstms, self.spine)

    def new_sent(self):
        new_state = self.vertical_lstm.initial_state()
        new_state = new_state.set_s(self.open_initial_state)
        self.open_constit_lstms = [new_state]
        self.spine = [-1]
        self.debug_stack = [[]]
        return ParserState(self.open_constit_lstms, self.spine)

    def debug_embed_vertical(self, vertical):
        state = self.vertical_lstm.initial_state()
        state = state.set_s(self.open_initial_state)
        for word in vertical:
            if type(word) == list:
                emb = self.debug_embed_vertical(word)
            else:
                emb = self.embed_word(self.vocab.convert(word))
            state = state.add_input(emb)
        return state.output()

    def debug_embed(self):
        top_state = self.top_lstm.initial_state()
        top_state = top_state.set_s(self.top_initial_state)

        assert len(self.open_constit_lstms) == len(self.debug_stack)
        for i, open_constit in enumerate(self.debug_stack):
            emb = self.debug_embed_vertical(open_constit)
            top_state = top_state.add_input(emb)
            alt = self.open_constit_lstms[i]
            #c = 'O' if np.isclose(emb.npvalue(), alt.output().npvalue()).all() else 'X'
            #print(c, emb.npvalue(), alt.output().npvalue())
            #assert np.isclose(emb.npvalue(), alt.output().npvalue()).all()
        #print()
        return top_state

    warned = False

    def compute_loss(self, state, word):
        top_state = self.top_lstm.initial_state()
        top_state = top_state.set_s(self.top_initial_state)
        assert len(state.open_constits) == len(state.spine)
        for open_constit, spine_word in zip(state.open_constits, state.spine):
            constit_emb = open_constit.output()
            if self.residual and spine_word != -1:
                spine_word_emb = self.embed_word(spine_word)
                if False:
                    constit_emb += spine_word_emb
                else:
                    inp = dy.concatenate([constit_emb, spine_word_emb])
                    mask = self.gate_mlp(inp)
                    mask = dy.logistic(mask)
                    constit_emb = dy.cmult(1 - mask, constit_emb)
                    constit_emb = constit_emb + dy.cmult(mask, spine_word_emb)
            top_state = top_state.add_input(constit_emb)
        #debug_top_state = self.debug_embed()
        #assert np.isclose(top_state.output().npvalue(), debug_top_state.output().npvalue()).all()

        logits = self.final_mlp(top_state.output())
        loss = dy.pickneglogsoftmax(logits, word)

        #if not self.warned:
        #  sys.stderr.write('WARNING: compute_loss hacked to not include actual terminals.\n')
        #  self.warned = True
        #if word != 0 and word != 1:
        #  probs = -dy.softmax(logits)
        #  left_prob = dy.pick(probs, 0)
        #  right_prob = dy.pick(probs, 1)
        #  loss = dy.log(1 - left_prob - right_prob)
        #else:
        #  loss = dy.pickneglogsoftmax(logits, word)

        return loss

    def build_graph(self, sent):
        state = self.new_sent()

        losses = []
        for word in sent:
            loss = self.compute_loss(state, word)
            losses.append(loss)
            state = self.add_input(state, word)

        return dy.esum(losses)
def sparse_gnn_edge_mlp_layer(node_embeddings: tf.Tensor,
                              adjacency_lists: List[tf.Tensor],
                              type_to_num_incoming_edges: tf.Tensor,
                              state_dim: Optional[int],
                              num_timesteps: int = 1,
                              activation_function: Optional[str] = "ReLU",
                              message_aggregation_function: str = "sum",
                              normalize_by_num_incoming: bool = False,
                              use_target_state_as_input: bool = True,
                              num_edge_hidden_layers: int = 1) -> tf.Tensor:
    """
    Compute new graph states by neural message passing using an edge MLP.
    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
    matrices A_\ell.

    We compute new states as follows:
        h^{t+1}_v := \sum_\ell
                     \sum_{(u, v) \in A_\ell}
                        \sigma(1/c_{v,\ell} * MLP(h^t_u || h^t_v))
        c_{\v,\ell} is usually 1 (but could also be the number of incoming edges).
    The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}.

    We use the following abbreviations in shape descriptions:
    * V: number of nodes
    * D: state dimension
    * L: number of different edge types
    * E: number of edges of a given edge type

    Arguments:
        node_embeddings: float32 tensor of shape [V, D], the original representation of
            each node in the graph.
        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
            of type l connects node v to node u.
        type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number
            of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v]
            is the number of edge of type l connecting to node v.
        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
            to D, the dimensionality of the input. If different from the input dimension,
            parameter num_timesteps has to be 1.
        num_timesteps: Number of repeated applications of this message passing layer.
        activation_function: Type of activation function used.
        message_aggregation_function: Type of aggregation function used for messages.
        normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
            of incoming edges).
        use_target_state_as_input: Flag indicating if the edge MLP should consume both
            source and target state (True) or only source state (False).
        num_edge_hidden_layers: Number of hidden layers of the edge MLP.
        message_weights_dropout_ratio: Dropout ratio applied to the weights used
            to compute message passing functions.

    Returns:
        float32 tensor of shape [V, state_dim]
    """
    num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0]
    if state_dim is None:
        state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1]

    # === Prepare things we need across all timesteps:
    activation_fn = get_activation(activation_function)
    message_aggregation_fn = get_aggregation_function(
        message_aggregation_function)
    edge_type_to_edge_mlp = []  # MLPs to compute the edge messages
    edge_type_to_message_targets = []  # List of tensors of message targets
    for edge_type_idx, adjacency_list_for_edge_type in enumerate(
            adjacency_lists):
        edge_type_to_edge_mlp.append(
            MLP(out_size=state_dim,
                hidden_layers=num_edge_hidden_layers,
                activation_fun=tf.nn.elu,
                name="Edge_%i_MLP" % edge_type_idx))
        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])

    # Let M be the number of messages (sum of all E):
    message_targets = tf.concat(edge_type_to_message_targets,
                                axis=0)  # Shape [M]

    cur_node_states = node_embeddings
    for _ in range(num_timesteps):
        messages_per_type = []  # list of tensors of messages of shape [E, D]
        # Collect incoming messages per edge type
        for edge_type_idx, adjacency_list_for_edge_type in enumerate(
                adjacency_lists):
            edge_sources = adjacency_list_for_edge_type[:, 0]
            edge_targets = adjacency_list_for_edge_type[:, 1]
            edge_source_states = \
                tf.nn.embedding_lookup(params=cur_node_states,
                                       ids=edge_sources)  # Shape [E, D]

            edge_mlp_inputs = edge_source_states
            if use_target_state_as_input:
                edge_target_states = \
                    tf.nn.embedding_lookup(params=cur_node_states,
                                           ids=edge_targets)  # Shape [E, D]
                edge_mlp_inputs = tf.concat(
                    [edge_source_states, edge_target_states],
                    axis=1)  # Shape [E, 2*D]

            messages = edge_type_to_edge_mlp[edge_type_idx](
                edge_mlp_inputs)  # Shape [E, D]

            if normalize_by_num_incoming:
                per_message_num_incoming_edges = \
                    tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
                                           ids=edge_targets)  # Shape [E, H]
                messages = tf.expand_dims(
                    1.0 / (per_message_num_incoming_edges + SMALL_NUMBER),
                    axis=-1) * messages
            messages_per_type.append(messages)

        all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
        all_messages = activation_fn(
            all_messages
        )  # Shape [M, D]  (Apply nonlinearity to Edge-MLP outputs as well)
        aggregated_messages = \
            message_aggregation_fn(data=all_messages,
                                   segment_ids=message_targets,
                                   num_segments=num_nodes)  # Shape [V, D]

        new_node_states = aggregated_messages
        cur_node_states = new_node_states

    return cur_node_states
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [],
                                                         name='num_graphs')

        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(
            tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                    )
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['process_raw_graphs'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], 1, [],
                        self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], 1, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                computed_values, sigm_val = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id])

                def f(x):
                    x = 1 * x
                    x = x.astype(np.float32)
                    return x

                new_computed_values = tf.nn.sigmoid(computed_values)
                new_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=computed_values,
                        labels=self.placeholders['target_values'][
                            internal_id, :]))
                a = tf.math.greater_equal(new_computed_values, self.threshold)
                a = tf.py_func(f, [a], tf.float32)
                correct_pred = tf.equal(
                    a, self.placeholders['target_values'][internal_id, :])
                self.ops['new_computed_values'] = new_computed_values
                self.ops['sigm_val'] = sigm_val
                self.ops['accuracy_task%i' % task_id] = tf.reduce_mean(
                    tf.cast(correct_pred, tf.float32))

                b = tf.multiply(
                    self.placeholders['target_values'][internal_id, :], 2)
                b = tf.py_func(f, [b], tf.float32)
                c = tf.cast(a, tf.float32)
                d = tf.math.add(b, c)
                self.ops['sigm_c'] = correct_pred

                d_TP = tf.math.equal(d, 3)
                TP = tf.reduce_sum(tf.cast(d_TP, tf.float32))  # true positive
                d_FN = tf.math.equal(d, 2)
                FN = tf.reduce_sum(tf.cast(d_FN, tf.float32))  # false negative
                d_FP = tf.math.equal(d, 1)
                FP = tf.reduce_sum(tf.cast(d_FP, tf.float32))  # false positive
                d_TN = tf.math.equal(d, 0)
                TN = tf.reduce_sum(tf.cast(d_TN, tf.float32))  # true negative
                self.ops['sigm_sum'] = tf.add_n([TP, FN, FP, TN])
                self.ops['sigm_TP'] = TP
                self.ops['sigm_FN'] = FN
                self.ops['sigm_FP'] = FP
                self.ops['sigm_TN'] = TN

                R = tf.cast(tf.divide(TP, tf.add(TP, FN)),
                            tf.float32)  # Recall
                P = tf.cast(tf.divide(TP, tf.add(TP, FP)),
                            tf.float32)  # Precision
                FPR = tf.cast(tf.divide(FP, tf.add(TN, FP)),
                              tf.float32)  # FPR: false positive rate
                D_TP = tf.add(TP, TP)
                F1 = tf.cast(tf.divide(D_TP, tf.add_n([D_TP, FP, FN])),
                             tf.float32)  # F1
                self.ops['sigm_Recall'] = R
                self.ops['sigm_Precision'] = P
                self.ops['sigm_F1'] = F1
                self.ops['sigm_FPR'] = FPR
                self.ops['losses'].append(new_loss)
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#24
0
    def make_model(self):
        #TODO: refactor
        if self.args['--pr'] == 'molecule':
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_mask')
        elif self.args['--pr'] in ['identity']:
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [None, None, self.num_edge_types, None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [self.num_edge_types, None], name='target_mask')
        elif self.args['--pr'] in ['btb']:
            self.placeholders['target_values_head'] = tf.compat.v1.placeholder(
                tf.float32, [None, None], name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [self.output_size_edges, None], name='target_mask')
            self.placeholders[
                'target_values_edges'] = tf.compat.v1.placeholder(
                    tf.float32, [None, None], name='target_values')

        else:
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [None, len(self.params['task_ids']), None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_mask')
        self.placeholders['num_graphs'] = tf.compat.v1.placeholder(
            tf.int32, [], name='num_graphs')
        self.placeholders[
            'out_layer_dropout_keep_prob'] = tf.compat.v1.placeholder(
                tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.compat.v1.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            self.ops[
                'initial_node_representations'] = self.get_initial_node_representation(
                )
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                        self.ops['initial_node_representations'])
                self.ops[
                    'second_node_representations'] = self.compute_final_node_representations(
                        self.ops['initial_node_representations'], 1)
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        self.ops['losses_edges'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.compat.v1.variable_scope("out_layer_task%i" % task_id):
                output_size = self.params['output_size']
                hidden = []
                with tf.compat.v1.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], output_size, hidden,
                        self.placeholders['out_layer_dropout_keep_prob'])
                    self.weights[
                        'regression_gate_task_edges%i' % task_id] = MLP(
                            2 * self.params['hidden_size'],
                            self.output_size_edges, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                with tf.compat.v1.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], output_size, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                    self.weights[
                        'regression_transform_task_edges%i' % task_id] = MLP(
                            self.params['hidden_size'], self.output_size_edges,
                            [],
                            self.placeholders['out_layer_dropout_keep_prob'])

                computed_values = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.ops['initial_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id],
                    None)
                # BTB [b, v * o] ID [e * v * o,  b]  o is 1 for BTB
                if self.args['--pr'] in ['btb']:
                    computed_values_edges = self.gated_regression(
                        self.ops['final_node_representations'],
                        self.ops['initial_node_representations'],
                        self.weights['regression_gate_task_edges%i' % task_id],
                        self.weights['regression_transform_task_edges%i' %
                                     task_id],
                        None,
                        is_edge_regr=True)
                    # [b, v * e]

                task_target_mask = self.placeholders['target_mask'][
                    internal_id, :]
                # ID [b] else: [b]
                task_target_num = tf.reduce_sum(
                    input_tensor=task_target_mask) + SMALL_NUMBER
                # ID and else: b
                if self.args['--pr'] == 'molecule':
                    labels = self.placeholders['target_values'][internal_id, :]
                    mask = tf.transpose(a=self.placeholders['node_mask'])
                elif self.args['--pr'] in ['identity']:
                    labels = self.placeholders['target_values']  # [o, v, e, b]
                    labels = tf.transpose(a=labels, perm=[2, 1, 0,
                                                          3])  # [e, v, o, b]
                    labels = tf.reshape(labels,
                                        [-1, self.placeholders['num_graphs']
                                         ])  # [e * v * o, b]
                    # node_mask ID [b, e * v * o]
                    mask = tf.transpose(
                        a=self.placeholders['node_mask'])  # [e * v * o,b]
                    # ID: [e * v * o,b]
                elif self.args['--pr'] in ['btb']:
                    labels = self.placeholders[
                        'target_values_head']  # [b, v * o]
                    mask = self.placeholders['node_mask']  #[b, v * o]
                    labels_edges = self.placeholders[
                        'target_values_edges']  # [b, v * e]
                    mask_edges = self.placeholders[
                        'node_mask_edges']  # [b, v * e]
                else:
                    labels = self.placeholders['target_values'][:,
                                                                internal_id, :]
                    mask = tf.transpose(a=self.placeholders['node_mask'])
                # diff = computed_values - labels
                # diff = diff * task_target_mask  # Mask out unused values
                # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num
                # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num
                # # Normalise loss to account for fewer task-specific examples in batch:
                # task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0))

                # diff =  tf.math.argmax(computed_values, axis = 1) - tf.math.argmax(self.placeholders['target_values'][internal_id, :], axis = 1)
                # diff = tf.dtypes.cast(diff, tf.float32)
                #TODO: FIX THIS

                # computed_values *= task_target_mask
                # we need to redo accuracy
                # diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                #                                                logits=computed_values)
                # task_loss = diff
                if self.args['--pr'] == 'molecule':
                    self.calculate_losses_for_molecules(
                        computed_values, internal_id, task_id)
                else:
                    if self.args['--pr'] == 'btb':
                        task_loss_heads = tf.reduce_sum(-tf.reduce_sum(
                            labels * tf.math.log(computed_values), axis=1)
                                                        ) / task_target_num
                        task_loss_edges = tf.reduce_sum(-tf.reduce_sum(
                            labels_edges * tf.math.log(computed_values_edges),
                            axis=1)) / task_target_num
                        # task_loss = (task_loss_heads + task_loss_edges) * tf.cast(self.placeholders['num_vertices'], tf.float32)
                        task_loss = (task_loss_heads + task_loss_edges)
                    else:
                        if self.args.get('--no_labels'):
                            computed_values, labels, mask = self.reduce_edge_dimension(
                                computed_values=computed_values,
                                labels=labels,
                                mask=mask)
                        new_mask = tf.cast(mask, tf.bool)
                        masked_loss = tf.boolean_mask(
                            tensor=labels * tf.math.log(computed_values),
                            mask=new_mask)
                        task_loss = tf.reduce_sum(
                            input_tensor=-1 * masked_loss) / task_target_num
                    self.ops['accuracy_task%i' % task_id] = task_loss
                    self.ops['losses'].append(task_loss)
                    self.ops['losses_edges'].append(task_loss_edges)
                    self.ops['computed_values'] = computed_values
                    self.ops['computed_values_edges'] = computed_values_edges
                    self.ops['labels'] = labels
                    self.ops['node_mask'] = tf.transpose(
                        mask) if self.args['--pr'] != 'btb' else mask
                    self.ops['task_target_mask'] = task_target_mask

        self.ops['loss'] = tf.reduce_sum(input_tensor=self.ops['losses'])
        self.ops['loss_edges'] = tf.reduce_sum(
            input_tensor=self.ops['losses_edges'])
示例#25
0
def init_STRFNet(sample_batch,
                 num_classes,
                 num_kernels=32,
                 residual_channels=[32, 32],
                 embedding_dimension=1024,
                 num_rnn_layers=2,
                 frame_rate=None,
                 bins_per_octave=None,
                 time_support=None,
                 frequency_support=None,
                 conv2d_sizes=(3, 3),
                 mlp_hiddims=[],
                 activate_out=nn.LogSoftmax(dim=1)):
    """Initialize a STRFNet for multi-class classification.

    This is a one-stop solution to create STRFNet and its variants.

    Parameters
    ----------
    sample_batch: [Batch,Time,Frequency] torch.FloatTensor
        A batch of training examples that is used for training.
        Some dimension parameter of the network is inferred cannot be changed.
    num_classes: int
        Number of classes for the classification task.

    Keyword Parameters
    ------------------
    num_kernels: int, 32
        2*num_kernels is the number of STRF/2D kernels.
        Doubling is due to the two orientations of the STRFs.
    residual_channels: list(int), [32, 32]
        Specify the number of conv2d channels for each residual block.
    embedding_dimension: int, 1024
        Dimension of the learned embedding (RNN output).
    frame_rate: float, None
        Sampling rate [samples/second] / hop size [samples].
        No STRF kernels by default.
    bins_per_octave: int, None
        Frequency bins per octave in CQT sense. (TODO: extend for non-CQT rep.)
        No STRF kernels by default.
    time_support: float, None
        Number of seconds spanned by each STRF kernel.
        No STRF kernels by default.
    frequency_support: int/float, None
        If frame_rate or bins_per_octave is None, interpret as GaborSTRFConv.
            - Number of frequency bins (int) spanned by each STRF kernel.
        Otherwise, interpret as STRFConv.
            - Number of octaves spanned by each STRF kernel.
        No STRF kernels by default.
    conv2d_sizes: (int, int), (3, 3)
        nn.Conv2d kernel dimensions.
    mlp_hiddims: list(int), []
        Final MLP hidden layer dimensions.
        Default has no hidden layers.
    activate_out: callable, nn.LogSoftmax(dim=1)
        Activation function at the final layer.
        Default uses LogSoftmax for multi-class classification.
    """
    if all(p is not None for p in (time_support, frequency_support)):
        is_strfnet = True
        if all(p is not None for p in (frame_rate, bins_per_octave)):
            kernel_type = 'wavelet'
        else:
            assert all(
                type(p) is int for p in (time_support, frequency_support))
            kernel_type = 'gabor'
    else:
        is_strfnet = False
    is_cnn = conv2d_sizes is not None
    is_hybrid = is_strfnet and is_cnn
    if is_hybrid:
        print(f"Preparing for Hybrid STRFNet; kernel type is {kernel_type}.")
    elif is_strfnet:
        print(f"Preparing for STRFNet; kernel type is {kernel_type}.")
    elif is_cnn:
        print("Preparing for CNN.")
    else:
        raise ValueError("Insufficient parameters. Check example_STRFNet.")

    if not is_strfnet:
        strf_layer = None
    elif kernel_type == 'wavelet':
        strf_layer = STRFConv(frame_rate, bins_per_octave, time_support,
                              frequency_support, num_kernels)
    else:
        strf_layer = GaborSTRFConv(time_support, frequency_support,
                                   num_kernels)

    if is_cnn:
        d1, d2 = conv2d_sizes
        if d1 % 2 == 0:
            d1 += 1
            print("Enforcing odd conv2d dimension.")
        if d2 % 2 == 0:
            d2 += 1
            print("Enforcing odd conv2d dimension.")
        conv2d_layer = nn.Conv2d(
            1,
            2 * num_kernels,  # Double to match the total number of STRFs
            (d1, d2),
            padding=(d1 // 2, d2 // 2))
    else:
        conv2d_layer = None

    residual_layer = ModResnet((4 if is_hybrid else 2) * num_kernels,
                               residual_channels, False)
    with torch.no_grad():
        flattened_dimension = STRFNet.cnn_forward(sample_batch, strf_layer,
                                                  conv2d_layer,
                                                  residual_layer).shape[-1]

    linear_layer = nn.Linear(flattened_dimension, embedding_dimension)
    rnn = nn.GRU(embedding_dimension,
                 embedding_dimension,
                 batch_first=True,
                 num_layers=num_rnn_layers,
                 bidirectional=True)

    mlp = MLP(2 * embedding_dimension,
              num_classes,
              hiddims=mlp_hiddims,
              activate_hid=nn.LeakyReLU(),
              activate_out=activate_out,
              batchnorm=[True] * len(mlp_hiddims))

    return STRFNet(strf_layer, conv2d_layer, residual_layer, linear_layer, rnn,
                   mlp)
示例#26
0
    def make_model(self):
        num_task_id = len(self.params['task_ids'])
        self.placeholders['target_values'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            set_session(self.sess)
            if self.params['use_graph']:
                self.ops['final_node_representations'] = self.compute_final_node_representations()
                #print(self.ops['final_node_representations'].shape)
                #zero_array = np.zeros(self.ops['final_node_representations'].shape)
                #sess=tf.Session()
                #x = self.ops['final_node_representations'].eval(session=sess,feed_dict={self.ops['final_node_representations']:zero_array})
                #print(x)
                #with tf.Session() as sess:
                #    vector = self.ops['final_node_representations'].eval(session=sess)
                #    print(vector)
                # with open ('./outputs/ggnn_vector.txt', 'a') as f:
                #     f.write(str(tf.Session().run(self.ops['final_node_representations'])))
            else:
                self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 2, [],
                                                                           self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 2, [],
                                                                                self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(self.ops['final_node_representations'],
                                                        self.weights['regression_gate_task%i' % task_id],
                                                        self.weights['regression_transform_task%i' % task_id])

                #computed_values = tf.Print(computed_values-0.5, [computed_values-0.5, tf.shape(computed_values)], 'computed_values', summarize = 150)
                tv = self.placeholders['target_values'][internal_id,:] #tf.squeeze(
                #tv = tf.Print(tv, [tv, tf.shape(tv)], 'tv', summarize = 150)
                # if computed_values.shape.as_list() == tv.shape.as_list():
                #     tv = tf.squeeze(tv)
                #with open('labels_computedValues.txt','a') as f:
                #    f.write('target_values:'+str(self.sess.run(self.tv))+'\ncomputed_values:'+str(self.sess.run(self.computed_values))+'\n')
                labels = tf.argmax(tv, 1)
                prediction = tf.argmax(computed_values, 1)
                accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32))
                task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=computed_values, labels=tv))

                self.tv = tv
                self.computed_values = computed_values
                self.labels = labels
                self.prediction = prediction

                TP = tf.reduce_sum(prediction*labels)
                TN = tf.reduce_sum((1-prediction)*(1-labels))
                FP = tf.reduce_sum(prediction*(1-labels))
                FN = tf.reduce_sum((1-prediction)*labels)
                precision = TP / (TP + FP)
                recall = TP / (TP + FN)
                f1 = 2 * precision * recall / (precision + recall)

                self.ops['TP%i' % task_id] = TP
                self.ops['TN%i' % task_id] = TN
                self.ops['FP%i' % task_id] = FP
                self.ops['FN%i' % task_id] = FN

                self.ops['accuracy_task%i' % task_id] = accuracy
                self.ops['losses'].append(task_loss)

                self.ops['precision_task%i' % task_id] = precision
                self.ops['recall_task%i' % task_id] = recall
                self.ops['f1_task%i' % task_id] = f1
                
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#27
0
class BottomUpDepLM:
    def __init__(self,
                 pc,
                 action_vocab,
                 word_vocab_size,
                 rel_vocab_size,
                 layers,
                 hidden_dim,
                 labelled=True,
                 tied=False):
        self.labelled = labelled
        self.tied = tied
        self.action_vocab = action_vocab
        self.pc = pc.add_subcollection()
        action_vocab_size = len(action_vocab)

        if not self.tied:
            self.word_embs = self.pc.add_lookup_parameters(
                (word_vocab_size, hidden_dim))
        self.action_mlp = MLP(self.pc,
                              [hidden_dim, hidden_dim, action_vocab_size])
        self.word_mlp = MLP(self.pc, [hidden_dim, hidden_dim, word_vocab_size])

        self.combine_mlp = MLP(self.pc,
                               [2 * hidden_dim, hidden_dim, hidden_dim])

        self.stack_lstm = dy.LSTMBuilder(layers, hidden_dim, hidden_dim,
                                         self.pc)
        self.initial_state_params = [
            self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers)
        ]
        self.stack_embs = []

        if labelled:
            self.rel_embs = self.pc.add_lookup_parameters(
                (rel_vocab_size, hidden_dim))
            self.rel_mlp = MLP(self.pc,
                               [hidden_dim, hidden_dim, rel_vocab_size])

    def new_graph(self):
        self.action_mlp.new_graph()
        self.word_mlp.new_graph()
        self.combine_mlp.new_graph()
        if self.labelled:
            self.rel_mlp.new_graph()
        self.initial_state = [
            dy.parameter(p) for p in self.initial_state_params
        ]

    def new_sent(self):
        self.stack_embs = []
        self.stack = []
        state = self.stack_lstm.initial_state()
        state = state.set_s(self.initial_state)
        self.stack_embs.append(state)

    def set_dropout(self, r):
        self.action_mlp.set_dropout(r)
        self.word_mlp.set_dropout(r)
        self.combine_mlp.set_dropout(r)
        self.stack_lstm.set_dropout(r)
        if self.labelled:
            self.rel_mlp.set_dropout(r)

    def combine(self, head, child, direction):
        head_and_child = dy.concatenate([head, child])
        return self.combine_mlp(head_and_child)

    def embed_word(self, word):
        if self.tied:
            word_embs = self.word_mlp.layers[-1].w
            word_emb = dy.select_rows(word_embs, [word])
            word_emb = dy.transpose(word_emb)
        else:
            word_emb = dy.lookup(self.word_embs, word)
        return word_emb

    def embed_stack_naive(self):
        state = self.stack_lstm.initial_state()
        state = state.set_s(self.initial_state)
        for item in self.stack:
            state = state.add_input(item)
        return state.output()

    def embed_stack(self):
        return self.stack_embs[-1].output()

    def pop(self):
        self.stack.pop()
        self.stack_embs.pop()

    def push(self, v):
        self.stack.append(v)
        state = self.stack_embs[-1]
        state = state.add_input(v)
        self.stack_embs.append(state)

    def shift(self, word):
        word_emb = self.embed_word(word)
        self.push(word_emb)

    def reduce_right(self):
        assert len(self.stack) >= 2
        head = self.stack[-1]
        child = self.stack[-2]
        self.pop()
        self.pop()
        combined = self.combine(head, child, 'right')
        self.push(combined)

    def reduce_left(self):
        assert len(self.stack) >= 2
        head = self.stack[-2]
        child = self.stack[-1]
        self.pop()
        self.pop()
        combined = self.combine(head, child, 'left')
        self.push(combined)

    warned = False

    def build_graph(self, sent):
        losses = []
        self.new_sent()
        for action, subtype in sent:
            action_str = self.action_vocab.to_word(action)

            # predict action
            hidden_state = self.embed_stack()
            action_logits = self.action_mlp(hidden_state)
            action_nlp = dy.pickneglogsoftmax(action_logits, action)

            loss = action_nlp
            if action_str == 'shift':
                if not self.warned:
                    sys.stderr.write(
                        'WARNING: Hacked to not include terminal losses')
                    self.warned = True
                #word_logits = self.word_mlp(hidden_state)
                #word_nlp = dy.pickneglogsoftmax(word_logits, subtype)
                #loss += word_nlp
            elif self.labelled:
                rel_logits = self.rel_mlp(hidden_state)
                rel_nlp = dy.pickneglogsoftmax(rel_logits, subtype)
                #loss += rel_nlp
            losses.append(loss)

            # Do the reference action
            if action_str == 'shift':
                self.shift(subtype)
            elif action_str == 'right':
                self.reduce_right()
            elif action_str == 'left':
                self.reduce_left()
            else:
                assert 'Unknown action: %s' % action_str

        return dy.esum(losses)
示例#28
0
def sparse_rgin_layer(
    node_embeddings: tf.Tensor,
    adjacency_lists: List[tf.Tensor],
    state_dim: Optional[int],
    num_timesteps: int = 1,
    activation_function: Optional[str] = "ReLU",
    message_aggregation_function: str = "sum",
    use_target_state_as_input: bool = False,
    num_edge_MLP_hidden_layers: Optional[int] = 1,
    num_aggr_MLP_hidden_layers: Optional[int] = None,
) -> tf.Tensor:
    """
    Compute new graph states by neural message passing using MLPs for state updates
    and message computation.
    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
    matrices A_\ell.

    We compute new states as follows:
        h^{t+1}_v := \sigma(MLP_{aggr}(\sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u)))
    The learnable parameters of this are the MLPs MLP_\ell.
    This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi
    with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of
    arXiv:1810.00826, as we want to be able to distinguish graphs of the form
     G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)})
    and
     G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)})
    from each other. If we would treat all edges the same,
    G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs
    become indistuingishable.
    Hence, we introduce per-edge-type MLPs, which also means that we have to drop
    the optimisation of modelling f \circ \phi by a single MLP used in the original
    GIN formulation.

    We use the following abbreviations in shape descriptions:
    * V: number of nodes
    * D: state dimension
    * L: number of different edge types
    * E: number of edges of a given edge type

    Arguments:
        node_embeddings: float32 tensor of shape [V, D], the original representation of
            each node in the graph.
        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
            of type l connects node v to node u.
        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
            to D, the dimensionality of the input. If different from the input dimension,
            parameter num_timesteps has to be 1.
        num_timesteps: Number of repeated applications of this message passing layer.
        activation_function: Type of activation function used.
        message_aggregation_function: Type of aggregation function used for messages.
        use_target_state_as_input: Flag indicating if the edge MLP should consume both
            source and target state (True) or only source state (False).
        num_edge_MLP_hidden_layers: Number of hidden layers of the MLPs used to transform
            messages from neighbouring nodes. If None, the raw states are used directly.
        num_aggr_MLP_hidden_layers: Number of hidden layers of the MLPs used on the
            aggregation of messages from neighbouring nodes. If none, the aggregated messages
            are used directly.

    Returns:
        float32 tensor of shape [V, state_dim]
    """
    num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
    if state_dim is None:
        state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]

    # === Prepare things we need across all timesteps:
    activation_fn = get_activation(activation_function)
    message_aggregation_fn = get_aggregation_function(
        message_aggregation_function)

    if num_aggr_MLP_hidden_layers is not None:
        aggregation_MLP = MLP(out_size=state_dim,
                              hidden_layers=num_aggr_MLP_hidden_layers,
                              activation_fun=activation_fn,
                              name="Aggregation_MLP")  # type: Optional[MLP]
    else:
        aggregation_MLP = None

    if num_edge_MLP_hidden_layers is not None:
        edge_type_to_edge_mlp = [
        ]  # type: Optional[List[MLP]]  # MLPs to compute the edge messages
    else:
        edge_type_to_edge_mlp = None
    edge_type_to_message_targets = []  # List of tensors of message targets
    for edge_type_idx, adjacency_list_for_edge_type in enumerate(
            adjacency_lists):
        if edge_type_to_edge_mlp is not None and num_edge_MLP_hidden_layers is not None:
            edge_type_to_edge_mlp.append(
                MLP(out_size=state_dim,
                    hidden_layers=num_edge_MLP_hidden_layers,
                    activation_fun=activation_fn,
                    name="Edge_%i_MLP" % edge_type_idx))
        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])

    # Let M be the number of messages (sum of all E):
    message_targets = tf.concat(edge_type_to_message_targets,
                                axis=0)  # Shape [M]

    cur_node_states = node_embeddings
    for _ in range(num_timesteps):
        messages_per_type = []  # list of tensors of messages of shape [E, D]
        # Collect incoming messages per edge type
        for edge_type_idx, adjacency_list_for_edge_type in enumerate(
                adjacency_lists):
            edge_sources = adjacency_list_for_edge_type[:, 0]
            edge_targets = adjacency_list_for_edge_type[:, 1]
            edge_source_states = \
                tf.nn.embedding_lookup(params=cur_node_states,
                                       ids=edge_sources)  # Shape [E, D]

            edge_mlp_inputs = edge_source_states
            if use_target_state_as_input:
                edge_target_states = \
                    tf.nn.embedding_lookup(params=cur_node_states,
                                           ids=edge_targets)  # Shape [E, D]
                edge_mlp_inputs = tf.concat(
                    [edge_source_states, edge_target_states],
                    axis=1)  # Shape [E, 2*D]

            if edge_type_to_edge_mlp is not None:
                messages = edge_type_to_edge_mlp[edge_type_idx](
                    edge_mlp_inputs)  # Shape [E, D]
            else:
                messages = edge_mlp_inputs
            messages_per_type.append(messages)

        all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
        if edge_type_to_edge_mlp is not None:
            all_messages = activation_fn(
                all_messages
            )  # Shape [M, D]  (Apply nonlinearity to Edge-MLP outputs as well)
        aggregated_messages = \
            message_aggregation_fn(data=all_messages,
                                   segment_ids=message_targets,
                                   num_segments=num_nodes)  # Shape [V, D]

        new_node_states = aggregated_messages
        if aggregation_MLP is not None:
            new_node_states = aggregation_MLP(new_node_states)
        new_node_states = activation_fn(
            new_node_states
        )  # Note that the final MLP layer has no activation, so we do that here explicitly
        new_node_states = tf.contrib.layers.layer_norm(new_node_states)
        cur_node_states = new_node_states

    return cur_node_states
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None],
                                                            name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None],
                                                          name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_mode"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops['final_node_representations'] = self.compute_final_node_representations()
            else:
                self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 1, [],
                                                                           self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 1, [],
                                                                                self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(self.ops['final_node_representations'],
                                                        self.weights['regression_gate_task%i' % task_id],
                                                        self.weights['regression_transform_task%i' % task_id])
                print(computed_values)


# LOOK HERE
                # computed values -> Tensor mit Werte zwischen 0 und 1 <- hier habe ich ein sigmoid drauf angewendet,
                # eientlich sind sie nicht zwischen 0 und 1... -> sollte lieber kein sigmoid drüber gelegt werden? Aber
                # gleichzeitig nähern sich die Werte sowieso bald diesem Bereich an.
                # target values -> 0 oder 1: dass sind die Klassifikations-labels

                # Nach meinem Verständnis sollte die „Accuracy“ über die nachfolgende Formel berechnet werden:
                # Acc = 1 – 1/n*Summe(Label – Prediction)^2
                #
                # Abgebildet auf den vorliegenden Code (in etwa):
                # accuracy_task = 1 – 1/task_target_num * tf.reduce_sum(tf.square(diff))

                print_in = computed_values - self.placeholders['target_values'][internal_id,:]
                diff = tf.Print(print_in, [print_in], "DIFF: ")

                # uninteressant: nur None vergleich
                task_target_mask = self.placeholders['target_mask'][internal_id,:]
                task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER
                diff = diff * task_target_mask  # Mask out unused values

# if val_acc < best_val_acc -> val_acc ist die kumulation der accuracy values, so ziemlich,
                # wie sie in 'accuracy_task' landen und dann wird gesagt, dass das modell besser geworden ist.
                # Da bin ich mir irgendwie nicht sicher, dass das richtig ist...

# IF abs is used with binary classification -> DMG
                # Hier wundert mich das abs, weil wenn ich klassifikationen hab von z.B.:
                    # 0.5, label 1 -> diff = -0.5
                    # 0.5, label 0 -> diff = 0.5
                    # und hier sorgt dann das abs dafür, dass die werde eben wieder genau gleich sind.
                    # Und damit wird doch rein gar nichts erreicht dann...

                # Eigentlich Mean Square ERROR
                # actual:
                # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num
                # test:
                self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.square(diff)) / task_target_num

                # hier geht weider das Vorzeichen verloren, aber bei loss ist das egal, oder?
                # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num
                task_loss = tf.reduce_sum(tf.square(diff)) / task_target_num
                # Normalise loss to account for fewer task-specific examples in batch:
                task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0))
                self.ops['losses'].append(task_loss)
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
示例#30
0
    def make_model(self):
        self.placeholders['target_values'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_values')
        self.placeholders['target_mask'] = tf.placeholder(
            tf.float32, [len(self.params['task_ids']), None],
            name='target_mask')
        self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [],
                                                         name='num_graphs')
        self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(
            tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                    )
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.variable_scope("out_layer_task%i" % task_id):
                with tf.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], 1, [],
                        self.placeholders['out_layer_dropout_keep_prob'])
                with tf.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], 1, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                computed_values = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id])
                # with tf.Session() as my_sess:
                #     print("此batch得到的结果有" + str(computed_values.shape) + "个,分别是:\n" + my_sess.run(computed_values) + "\n")
                #     print("原始的结果有" + str(self.placeholders['target_values'][internal_id,:].shape) + "个,分别是:\n" + my_sess.run(self.placeholders['target_values'][internal_id,:]))
                #     correct = 0
                #     for i in range(computed_values.shape):
                #         if (computed_values[i] > 0 and self.placeholders['target_values'][internal_id,:][i] > 0) or (computed_values[i] < 0 and self.placeholders['target_values'][internal_id,:][i] < 0):
                #             correct = correct + 1
                #     print("此batch正确预测的个数:" + str(correct))
                diff = computed_values - self.placeholders['target_values'][
                    internal_id, :]
                task_target_mask = self.placeholders['target_mask'][
                    internal_id, :]
                task_target_num = tf.reduce_sum(
                    task_target_mask) + SMALL_NUMBER
                diff = diff * task_target_mask  # Mask out unused values
                self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(
                    tf.abs(diff)) / task_target_num
                self.ops['predict_task%i' % task_id] = computed_values
                task_loss = tf.reduce_sum(
                    0.5 * tf.square(diff)) / task_target_num
                # Normalise loss to account for fewer task-specific examples in batch:
                task_loss = task_loss * (
                    1.0 /
                    (self.params['task_sample_ratios'].get(task_id) or 1.0))
                self.ops['losses'].append(task_loss)
        self.ops['loss'] = tf.reduce_sum(self.ops['losses'])