Exemplo n.º 1
0
    def apply(self, sent1, sent2):
        eL = dy.parameter(self.linear)
        sent1 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent1)) * eL
        sent2 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent2)) * eL

        out1, out2 = self.feed_F(sent1, sent2)
        e_out = out1 * dy.transpose(out2)
        prob_f_1 = dy.softmax(e_out)
        score = dy.transpose(e_out)
        prob_f_2 = dy.softmax(score)

        sent1_allign = dy.concatenate_cols([sent1, prob_f_1 * sent2])
        sent2_allign = dy.concatenate_cols([sent2, prob_f_2 * sent1])

        out_g_1, out_g_2 = self.feed_G(sent1_allign, sent2_allign)

        sent1_out_g = dy.sum_dim(out_g_1, [0])
        sent2_out_g = dy.sum_dim(out_g_2, [0])

        concat = dy.transpose(dy.concatenate([sent1_out_g, sent2_out_g]))

        h_step_1 = dy.parameter(self.h_step_1)
        sent_h = dy.rectify(dy.dropout(concat, 0.2) * h_step_1)
        h_step_2 = dy.parameter(self.h_step_2)
        sent_h = dy.rectify(dy.dropout(sent_h, 0.2) * h_step_2)

        final = dy.parameter(self.linear2)
        final = dy.transpose(sent_h * final)
        return final
Exemplo n.º 2
0
    def __call__(self, sent1, sent2):
        """
        :param sent1: np matrix.
        :param sent2: np matrix.
        :return: np array of 3 elements.
        """
        sent1_linear, sent2_linear = self.apply_linear_embed(sent1, sent2)
        f1, f2 = self.apply_f(sent1_linear, sent2_linear)

        score1 = f1 * dy.transpose(f2)
        prob1 = dy.softmax(score1)
        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        sent1_combine = dy.concatenate_cols(
            [sent1_linear, prob1 * sent2_linear])
        sent2_combine = dy.concatenate_cols(
            [sent2_linear, prob2 * sent1_linear])

        # sum
        g1, g2 = self.apply_g(sent1_combine, sent2_combine)
        sent1_output = dy.sum_dim(g1, [0])
        sent2_output = dy.sum_dim(g2, [0])

        input_combine = dy.transpose(
            dy.concatenate([sent1_output, sent2_output]))
        h = self.apply_h(input_combine)

        linear_final = dy.parameter(self.linear_final)
        h = h * linear_final

        output = dy.log_softmax(dy.transpose(h))
        return output
Exemplo n.º 3
0
    def forward(self, s1, s2, label=None):
        eL = dy.parameter(self.embeddingLinear)
        s1 = dy.inputTensor(s1) * eL
        s2 = dy.inputTensor(s2) * eL

        # F step
        Lf1 = dy.parameter(self.mlpF1)
        Fs1 = dy.rectify(dy.dropout(s1, 0.2) * Lf1)
        Fs2 = dy.rectify(dy.dropout(s2, 0.2) * Lf1)
        Lf2 = dy.parameter(self.mlpF2)
        Fs1 = dy.rectify(dy.dropout(Fs1, 0.2) * Lf2)
        Fs2 = dy.rectify(dy.dropout(Fs2, 0.2) * Lf2)

        # Attention scoring
        score1 = Fs1 * dy.transpose(Fs2)
        prob1 = dy.softmax(score1)

        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        # Align pairs using attention
        s1Pairs = dy.concatenate_cols([s1, prob1 * s2])
        s2Pairs = dy.concatenate_cols([s2, prob2 * s1])

        # G step
        Lg1 = dy.parameter(self.mlpG1)
        Gs1 = dy.rectify(dy.dropout(s1Pairs, 0.2) * Lg1)
        Gs2 = dy.rectify(dy.dropout(s2Pairs, 0.2) * Lg1)
        Lg2 = dy.parameter(self.mlpG2)
        Gs1 = dy.rectify(dy.dropout(Gs1, 0.2) * Lg2)
        Gs2 = dy.rectify(dy.dropout(Gs2, 0.2) * Lg2)

        # Sum
        Ss1 = dy.sum_dim(Gs1, [0])
        Ss2 = dy.sum_dim(Gs2, [0])

        concatS12 = dy.transpose(dy.concatenate([Ss1, Ss2]))

        # H step
        Lh1 = dy.parameter(self.mlpH1)
        Hs = dy.rectify(dy.dropout(concatS12, 0.2) * Lh1)
        Lh2 = dy.parameter(self.mlpH2)
        Hs = dy.rectify(dy.dropout(Hs, 0.2) * Lh2)

        # Final layer
        final_layer = dy.parameter(self.final_layer)
        final = dy.transpose(Hs * final_layer)

        # Label can be 0...
        if label != None:
            return dy.pickneglogsoftmax(final, label)
        else:
            out = dy.softmax(final)
            return np.argmax(out.npvalue())
def aggregate_v1_v2(v1, v2, model_params):
    H_w1 = model_params['H_w1']
    H_b1 = model_params['H_b1']
    H_w2 = model_params['H_w2']
    H_b2 = model_params['H_b2']

    v1_sum = dy.sum_dim(v1, [1])
    v2_sum = dy.sum_dim(v2, [1])

    con = dy.concatenate([v1_sum, v2_sum])
    #con = dy.dropout(con, DROPOUT_RATE)

    y_hat = dy.softmax(H_w2 * (dy.rectify((H_w1 * con) + H_b1)) + H_b2)

    return y_hat
Exemplo n.º 5
0
 def get_scores_logsoftmax(self, mlp_dec_state):
     score = super().get_scores(mlp_dec_state)
     lex_prob = self.lexicon_prob * self.attender.get_last_attention()
     # Note that the sum dim is only summing a tensor of 1 size in dim 1.
     # This is to make sure that the shape of the returned tensor matches the vanilla decoder
     return dy.sum_dim(self.lexicon_method(mlp_dec_state, score, lex_prob),
                       [1])
Exemplo n.º 6
0
 def calc_probs(self, x: dy.Expression) -> dy.Expression:
   model_score = dy.softmax(self.calc_scores(x))
   if self.lexicon_type == 'linear':
     coeff = self.calculate_coeff(x)
     return dy.sum_dim(dy.cmult(coeff, model_score) + dy.cmult((1-coeff), self.calculate_dict_prob(x)), [1])
   else:
     return model_score
Exemplo n.º 7
0
 def cross_entropy_structbag(self, P, Q):
     """
     P (K x m) represents a distribution over STRUCTURED labels where each
     label is a BAG of K INDEPENDENT symbols taking values in {1 ... m}.
     That is, z = (z1 ... zK) is assigned probability P1(z1) * ... * PK(zK).
     (Similarly for Q.) By the independence, H(P, Q) = sum_k H(Pk, Qk).
     """
     return -dy.sum_dim(dy.cmult(P, self.log2(Q)), [0, 1])
Exemplo n.º 8
0
        def __call__(self, query, options, gold, lengths, query_no):
            if len(options) == 1:
                return None, 0

            final = []
            if args.word_vectors:
                qvecs = [dy.lookup(self.pEmbedding, w) for w in query]
                qvec_max = dy.emax(qvecs)
                qvec_mean = dy.average(qvecs)
            for otext, features in options:
                if not args.no_features:
                    inputs = dy.inputTensor(features)
                if args.word_vectors:
                    ovecs = [dy.lookup(self.pEmbedding, w) for w in otext]
                    ovec_max = dy.emax(ovecs)
                    ovec_mean = dy.average(ovecs)
                    if args.no_features:
                        inputs = dy.concatenate(
                            [qvec_max, qvec_mean, ovec_max, ovec_mean])
                    else:
                        inputs = dy.concatenate(
                            [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean])
                if args.drop > 0:
                    inputs = dy.dropout(inputs, args.drop)
                h = inputs
                for pH, pB in zip(self.hidden, self.bias):
                    h = dy.affine_transform([pB, pH, h])
                    if args.nonlin == "linear":
                        pass
                    elif args.nonlin == "tanh":
                        h = dy.tanh(h)
                    elif args.nonlin == "cube":
                        h = dy.cube(h)
                    elif args.nonlin == "logistic":
                        h = dy.logistic(h)
                    elif args.nonlin == "relu":
                        h = dy.rectify(h)
                    elif args.nonlin == "elu":
                        h = dy.elu(h)
                    elif args.nonlin == "selu":
                        h = dy.selu(h)
                    elif args.nonlin == "softsign":
                        h = dy.softsign(h)
                    elif args.nonlin == "swish":
                        h = dy.cmult(h, dy.logistic(h))
                final.append(dy.sum_dim(h, [0]))

            final = dy.concatenate(final)
            nll = -dy.log_softmax(final)
            dense_gold = []
            for i in range(len(options)):
                dense_gold.append(1.0 / len(gold) if i in gold else 0.0)
            answer = dy.inputTensor(dense_gold)
            loss = dy.transpose(answer) * nll
            predicted_link = np.argmax(final.npvalue())

            return loss, predicted_link
 def log_sum_exp(tag_score_arr):
     argmax = np.argmax(tag_score_arr.value())
     max_score = tag_score_arr[argmax]
     score = max_score
     max_arr = dynet.concatenate(
         [max_score for i in range(len(self.pos) + 2)])
     score += dynet.log(
         dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0]))
     return score
Exemplo n.º 10
0
  def generate(self, src, forced_trg_ids):
    assert not forced_trg_ids
    assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented"
    src = src[0]
    # Generating outputs
    outputs = []
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.generate_per_step:
        assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True"
        scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings]
      else:
        if self.mode == "avg_mlp":
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1])
        elif self.mode == "final_mlp":
          encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
        scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode == "lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True))
        enc_lin.append(step_linear)
      if self.generate_per_step:
        scores = [dy.logistic(enc_i) for enc_i in enc_lin]
      else:
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                        dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
        scores = dy.logistic(encoding_fixed_size)
    else:
      raise ValueError(f"unknown mode '{self.mode}'")

    if self.generate_per_step:
      output_actions = [np.argmax(score_i.npvalue()) for score_i in scores]
      score = np.sum([np.max(score_i.npvalue()) for score_i in scores])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    else:
      scores_arr = scores.npvalue()
      output_actions = list(np.nonzero(scores_arr > 0.5)[0])
      score = np.sum(scores_arr[scores_arr > 0.5])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    return outputs
Exemplo n.º 11
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dy.pick(scores, argmax_score)
     max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                               self.tagset_size)
     return max_score_expr + dy.log(
         dy.sum_dim(
             dy.transpose(dy.exp(scores - max_score_expr_broadcast)),
             [1]))
Exemplo n.º 12
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dynet.pick(scores, argmax_score)
     max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                  (self.n_tags + 2))
     return max_score_expr + dynet.log(
         dynet.sum_dim(
             dynet.transpose(
                 dynet.exp(scores - max_score_expr_broadcast)), [1]))
Exemplo n.º 13
0
def aggregate(sentence_a, sentence_b):
    w1 = dy.parameter(decide_w1)
    b1 = dy.parameter(decide_b1)
    w2 = dy.parameter(decide_w2)
    b2 = dy.parameter(decide_b2)

    sentence_a = dy.sum_dim(sentence_a, [1])
    logging.debug("Sentence a reduction shape: " + str(sentence_a.dim()))
    sentence_b = dy.sum_dim(sentence_b, [1])
    logging.debug("Sentence b reduction shape: " + str(sentence_b.dim()))

    combined = dy.concatenate([sentence_a, sentence_b])
    logging.debug("Combined representations shape: " + str(combined.dim()))

    x = (w1 * combined) + b1
    x = dy.rectify(x)
    logits = (w2 * x) + b2

    return logits
Exemplo n.º 14
0
  def calc_nll(self, src, trg):
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg])

    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.mode=="avg_mlp":
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]),
                                 dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1]
      elif self.mode=="final_mlp":
        encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
      scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode=="lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True))
        enc_lin.append(step_linear)
      if encodings.mask:
        encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                      dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
      else:
        encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
      scores = dy.logistic(encoding_fixed_size)

    else: raise ValueError(f"unknown mode '{self.mode}'")

    idxs = ([], [])
    for batch_i in range(trg.batch_size()):
      for word in set(trg[batch_i]):
        if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}:
          idxs[0].append(word)
          idxs[1].append(batch_i)
    trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, )
    loss_expr = dy.binary_log_loss(scores, trg_scores)
    return loss_expr
Exemplo n.º 15
0
def sum(x, dim=None, include_batch_dim=False):
    if isinstance(x, list):
        return dy.esum(x)
    head_shape, batch_size = x.dim()
    if dim is None:
        x =  dy.sum_elems(x)
        if include_batch_dim and batch_size > 1:
            return dy.sum_batches(x)
        else:
            return x
    else:
        if dim == -1:
            dim = len(head_shape) - 1
        return dy.sum_dim(x, d=[dim], b=include_batch_dim)
Exemplo n.º 16
0
    def _featurize_sentence(self, sentence, is_train, elmo_embeddings):
        # assert len(sentence) == elmo_embeddings.dim()[1], (elmo_embeddings.dim(), len(sentence))
        if is_train:
            self.lstm.set_dropout(self.dropout)
        else:
            self.lstm.disable_dropout()


        embeddings = []
        cur_word_index = 0
        for tag, word in [(START, START)] + sentence + [(STOP, STOP)]:
            if word not in (START, STOP):
                count = self.word_vocab.count(word)
                if self.use_elmo:
                    unk_word = (np.random.rand() < 1 / (1 + count)) or (np.random.rand() < 0.1)
                else:
                    unk_word = np.random.rand() < 1 / (1 + count)
                if not count or (is_train and unk_word):
                    word = UNK
                    # if random.random() < 0.5:
                    #     word = UNK
                    # else:
                    #     word = random.choice(self.word_vocab.values)
            word_embedding = self.word_embeddings[self.word_vocab.index(word)]
            input_components = [word_embedding]
            if self.use_elmo:
                if tag == START or tag == STOP:
                    elmo_embedding = dy.zeros(1024)
                else:
                    elmo_weights = dy.parameter(self.elmo_weights)
                    elmo_embedding = dy.sum_dim(dy.cmult(elmo_weights, dy.pick(elmo_embeddings,
                                                                               index=cur_word_index,
                                                                               dim=1)), [0])
                    cur_word_index += 1
                input_components.append(elmo_embedding)
            # else:
                # input_components[-1] = dy.rectify(self.projection(input_components[-1]))


            raw_input = dy.concatenate(input_components)
            if is_train:
                input = dy.dropout(raw_input, p=0.4)
            else:
                input = raw_input
            embeddings.append(input)
        return self.lstm.transduce(embeddings)
Exemplo n.º 17
0
    def on_calc_additional_loss(self, *args, **kwargs):
        seq_len = len(self.last_output)

        loss_expr = 0
        for pos_i in range(seq_len):
            input_i = self.last_output[pos_i]
            affine = self.linear_layer(input_i)
            softmax_out = dy.softmax(affine)
            if self.mode == "entropy":
                loss_expr = loss_expr - dy.sum_dim(
                    dy.cmult(dy.log(softmax_out), softmax_out), d=[0])
            elif self.mode == "max":
                loss_expr = loss_expr - dy.log(dy.max_dim(softmax_out))
            else:
                raise ValueError(f"unknown mode {self.mode}")
        # loss_expr = loss_expr * (self.scale / seq_len)
        loss_expr = loss_expr * self.scale

        return losses.FactoredLossExpr({"enc_entropy": loss_expr})
Exemplo n.º 18
0
 def _featurize_sentence(self, sentence, is_train, elmo_embeddings, cur_word_index):
     if is_train:
         self.lstm.set_dropout(self.dropout)
     else:
         self.lstm.disable_dropout()
     embeddings = []
     for tag, word in [(START, START)] + sentence + [(STOP, STOP)]:
         tag_embedding = self.tag_embeddings[self.tag_vocab.index(tag)]
         if word not in (START, STOP):
             count = self.word_vocab.count(word)
             if not count or (
                 is_train and (np.random.rand() < 1 / (1 + count) or np.random.rand() < 0.1)):
                 word = UNK
         word_embedding = self.word_embeddings[self.word_vocab.index(word)]
         if tag == START or tag == STOP:
             concatenated_embeddings = [tag_embedding, word_embedding, dy.zeros(1024)]
         else:
             elmo_weights = dy.parameter(self.elmo_weights)
             embedding = dy.sum_dim(dy.cmult(elmo_weights, elmo_embeddings[cur_word_index]),
                                    [0])
             concatenated_embeddings = [tag_embedding, word_embedding, embedding]
             cur_word_index += 1
         embeddings.append(dy.concatenate(concatenated_embeddings))
     return self.lstm.transduce(embeddings)
Exemplo n.º 19
0
    def _featurize_sentence(self, sentence, is_train, elmo_embeddings):
        if is_train:
            self.lstm.set_dropout(self.dropout)
        else:
            self.lstm.disable_dropout()

        embeddings = []
        cur_word_index = 0
        for tag, word in [(START, START)] + sentence + [(STOP, STOP)]:
            if word not in (START, STOP):
                count = self.word_vocab.count(word)
                unk_word = (np.random.rand() < 1 /
                            (1 + count)) or (np.random.rand() < 0.1)
                if not count or (is_train and unk_word):
                    word = UNK
            word_embedding = self.word_embeddings[self.word_vocab.index(word)]
            input_components = [word_embedding]
            if self.use_elmo:
                if tag == START or tag == STOP:
                    elmo_embedding = dy.zeros(1024)
                else:
                    elmo_weights = dy.parameter(self.elmo_weights)
                    elmo_embedding = dy.sum_dim(
                        dy.cmult(
                            elmo_weights,
                            dy.pick(elmo_embeddings,
                                    index=cur_word_index,
                                    dim=1)), [0])
                    cur_word_index += 1
                input_components.append(elmo_embedding)

            embedding = dy.concatenate(input_components)
            if is_train:
                embedding = dy.dropout(embedding, p=0.4)
            embeddings.append(embedding)
        return self.lstm.transduce(embeddings)
Exemplo n.º 20
0
 def transduce(self, embeds):
     return dy.sum_dim(embeds, [1])
Exemplo n.º 21
0
    def get_bert_embed(self, passage, lang, train=False):
        orig_tokens = passage
        bert_tokens = []
        # Token map will be an int -> int mapping between the `orig_tokens` index and
        # the `bert_tokens` index.
        orig_to_tok_map = []

        # Example:
        # orig_tokens = ["John", "Johanson", "'s",  "house"]
        # bert_tokens == ["[CLS]", "john", "johan", "##son", "'", "s", "house", "[SEP]"]
        # orig_to_tok_map == [(1), (2,3), (4,5), (6)]

        bert_tokens.append("[CLS]")
        for orig_token in orig_tokens:
            start_token = len(bert_tokens)
            bert_token = self.tokenizer.tokenize(orig_token)
            bert_tokens.extend(bert_token)
            end_token = start_token + len(bert_token)
            orig_to_tok_map.append(slice(start_token, end_token))
        bert_tokens.append("[SEP]")

        indexed_tokens = self.tokenizer.convert_tokens_to_ids(bert_tokens)
        tokens_tensor = self.torch.tensor([indexed_tokens])
        if self.config.args.bert_gpu:
            tokens_tensor = tokens_tensor.to('cuda')

        with self.torch.no_grad():
            encoded_layers, _ = self.bert_model(tokens_tensor)
        assert len(
            encoded_layers
        ) == self.bert_layers_count, "Invalid BERT layer count %s" % len(
            encoded_layers)

        aligned_layer = []
        for layer in range(self.bert_layers_count):
            aligned_layer.append([])
            for mapping_range in orig_to_tok_map:
                token_embeddings = encoded_layers[layer][0][mapping_range]
                if self.config.args.bert_token_align_by == "mean":
                    aligned_layer[layer].append(
                        self.torch.mean(token_embeddings,
                                        dim=(0, )).cpu().data.numpy())
                elif self.config.args.bert_token_align_by == "sum":
                    aligned_layer[layer].append(
                        self.torch.sum(token_embeddings,
                                       dim=(0, )).cpu().data.numpy())
                elif self.config.args.bert_token_align_by == "first":
                    aligned_layer[layer].append(
                        token_embeddings[0].cpu().data.numpy())
                else:
                    raise ValueError("Invalid BERT token align option '%s'" %
                                     self.config.args.bert_token_align_by)

        layer_list_to_use = self.config.args.bert_layers
        aligned_layer = [aligned_layer[i] for i in layer_list_to_use]

        if self.config.args.bert_layers_pooling == "weighted":
            bert_softmax = dy.softmax(self.params["bert_weights"])
            embeds = dy.cmult(dy.inputTensor(np.asarray(aligned_layer)),
                              bert_softmax)
            embeds = dy.sum_dim(embeds, [0])
        elif self.config.args.bert_layers_pooling == "concat":
            embeds = dy.inputTensor(np.concatenate(aligned_layer, axis=1))
        elif self.config.args.bert_layers_pooling == "sum":
            embeds = dy.inputTensor(np.sum(aligned_layer, axis=0))
        else:
            raise ValueError("Invalid BERT pooling option '%s'" %
                             self.config.args.bert_layers_pooling)

        if self.config.args.bert_multilingual == 0:
            assert lang
            if (lang + "_embed") in self.params:
                lang_embed = self.params[lang + "_embed"]
            else:
                lang_embed = self.model.add_parameters(50, init='glorot')
                self.params[lang + "_embed"] = lang_embed

            multilingual_embeds = []
            for embed in embeds:
                multilingual_embeds.append(dy.concatenate([lang_embed, embed]))

            embeds = dy.transpose(dy.concatenate_cols(multilingual_embeds))

        if self.config.args.bert_layers_pooling == "weighted":
            single_token_embed_len = self.bert_embedding_len
        elif self.config.args.bert_layers_pooling == "concat":
            single_token_embed_len = self.bert_embedding_len * len(
                layer_list_to_use)
        elif self.config.args.bert_layers_pooling == "sum":
            single_token_embed_len = self.bert_embedding_len
        else:
            raise ValueError("Invalid BERT pooling option '%s'" %
                             self.config.args.bert_layers_pooling)
        if self.config.args.bert_multilingual == 0:
            single_token_embed_len += 50

        # TODO: try dropout strategies like dropping at the per layer embeddings or dropping entire layers.
        assert embeds.dim() == ((len(passage), single_token_embed_len),
                                1), "Invalid BERT dim %s" % embeds.dim()

        assert 0 <= self.config.args.bert_dropout < 1, "Invalid BERT dropout %s" % self.config.args.bert_dropout
        if train:
            embeds = dy.dropout(embeds, self.config.args.bert_dropout)

        return embeds
def main():
    dy.renew_cg()
    
    try:
        train_file = open("%s" %(sys.argv[1]))
        test_file = open("%s" %(sys.argv[2]))
    except:
        print("python classification_dynet.py <train_file> <test_file>")
        sys.exit(1)

    train_text_set, train_content_label_set, train_type_label_set, unique_content, unique_type = extract_from_json(train_file)
    test_text_set, test_content_label_set, test_type_label_set, _, _ = extract_from_json(test_file)
    

    word_dict = {}
    word_dict = extract_dictionary(train_text_set, word_dict)
    word_dict = extract_dictionary(test_text_set, word_dict)

    train_feature_matrix = generate_feature_matrix(train_text_set, word_dict)
    test_feature_matrix = generate_feature_matrix(test_text_set, word_dict)


    features_total = len(train_feature_matrix[0])
    para_collec = dy.ParameterCollection()
    pW1 = para_collec.add_parameters((150, 200), dy.NormalInitializer())
    pBias1 = para_collec.add_parameters((150), dy.ConstInitializer(0))
    pW2_content = para_collec.add_parameters((100, 150), dy.NormalInitializer())
    pBias2_content = para_collec.add_parameters((100), dy.ConstInitializer(0))
    pW3_content = para_collec.add_parameters((len(unique_content), 100), dy.NormalInitializer())
    pBias3_content = para_collec.add_parameters((len(unique_content)), dy.ConstInitializer(0))
    pW2_type = para_collec.add_parameters((50, 150), dy.NormalInitializer())
    pBias2_type = para_collec.add_parameters((50), dy.ConstInitializer(0))
    pW3_type = para_collec.add_parameters((len(unique_type), 50), dy.NormalInitializer())
    pBias3_type = para_collec.add_parameters((len(unique_type)), dy.ConstInitializer(0))
    lookup = para_collec.add_lookup_parameters((features_total, 200), dy.NormalInitializer())

    trainer = dy.SimpleSGDTrainer(para_collec)
    
    for i in range(0, 1):
        # resample minority and majority classes
        majority, majority_content_label, majority_type_label, minority, minority_content_label, minority_type_label = label_separator("type", train_feature_matrix, train_content_label_set, train_type_label_set)
        minority_u_text, minority_u_content_label, minority_u_type_label = resample(minority, minority_content_label, minority_type_label, replace=True, n_samples=int(len(majority) * 3), random_state=123)

        X_train = train_feature_matrix
        y_train_content = train_content_label_set
        y_train_type = train_type_label_set

        for index in range(0, 500):

            w1 = dy.parameter(pW1)
            bias1 = dy.parameter(pBias1)
            w2_content = dy.parameter(pW2_content)
            bias2_content = dy.parameter(pBias2_content)
            w3_content = dy.parameter(pW3_content)
            bias3_content = dy.parameter(pBias3_content)
            w2_type = dy.parameter(pW2_type)
            bias2_type = dy.parameter(pBias2_type)
            w3_type = dy.parameter(pW3_type)
            bias3_type = dy.parameter(pBias3_type)
            
            input_text = []
            input_array = X_train[index]
            
            for i in range(0, X_train[index].size):
                if X_train[index][i] > 0:
                    input_text.append(lookup[X_train[index][i]])

            x = dy.concatenate(input_text, 1)
            e_in = dy.sum_dim(x, [1])/features_total
            e_affin1 = dy.affine_transform([bias1, w1, e_in])
            e_affin1 = dy.rectify(e_affin1)
            e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1])
            e_content_affin2 = dy.dropout(e_content_affin2, 0.5)
            e_content_affin2 = dy.rectify(e_content_affin2)
            e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2])
            e_content_affin3 = dy.dropout(e_content_affin3, 0.5)
            e_content_affin3 = dy.rectify(e_content_affin3)
            e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1])
            e_type_affin2 = dy.dropout(e_type_affin2, 0.5)
            e_type_affin2 = dy.rectify(e_type_affin2)
            e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2])
            e_type_affin3 = dy.dropout(e_type_affin3, 0.5)
            e_type_affin3 = dy.rectify(e_type_affin3)
            content_output = dy.pickneglogsoftmax(e_content_affin3, y_train_content[index])
            content_loss = content_output.scalar_value()
            type_output = dy.pickneglogsoftmax(e_type_affin3, y_train_type[index])
            type_loss = type_output.scalar_value()
            
            if index % 100 == 0:
                print(index, ": content_loss: ", content_loss, "type_loss", type_loss)
            
            content_output.backward()
            trainer.update()
            type_output.backward()
            trainer.update()

            dy.cg_checkpoint()

    print("testing...")
    pred_content = []
    pred_type = []

    w1 = dy.parameter(pW1)
    bias1 = dy.parameter(pBias1)
    w2_content = dy.parameter(pW2_content)
    bias2_content = dy.parameter(pBias2_content)
    w3_content = dy.parameter(pW3_content)
    bias3_content = dy.parameter(pBias3_content)
    w2_type = dy.parameter(pW2_type)
    bias2_type = dy.parameter(pBias2_type)
    w3_type = dy.parameter(pW3_type)
    bias3_type = dy.parameter(pBias3_type)

    for index in range(0, len(test_feature_matrix)):
       
        input_text = []
        line = train_text_set[index]
        for word in line:
            # check if RT
            if word == "RT":
                input_text.append(lookup[len(word_dict)])
            # check if hashtag
            if word[0] == "#":
                input_text.append(lookup[len(word_dict) + 1])

            # check if mention
            if word[0] == "@":
                input_text.append(lookup[len(word_dict) + 2])

            # just word itself 
            if word in word_dict:
                input_text.append(lookup[word_dict[word]])

            try: 
                # lower capiticalization of the word
                lower_word = str(word).lower()
                input_text.append(lookup[word_dict[lower_word]])
                # no punctuation 
                replace_punctuation = str(word).maketrans(string.punctuation, '')
                clean_word = str(word).translate(replace_punctuation)
                input_text.append(lookup[word_dict[clean_word]])
            except:
                continue

        e_in = dy.sum_dim(x, [1])/features_total
        e_affin1 = dy.affine_transform([bias1, w1, e_in])
        e_affin1 = dy.rectify(e_affin1)
        e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1])
        e_content_affin2 = dy.rectify(e_content_affin2)
        e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2])
        e_content_affin3 = dy.rectify(e_content_affin3)
        e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1])
        e_type_affin2 = dy.rectify(e_type_affin2)
        e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2])
        e_type_affin3 = dy.rectify(e_type_affin3)
        content_output = np.argmax(e_content_affin3.npvalue())
        pred_content.append(content_output)
        type_output = np.argmax(e_type_affin3.npvalue())
        pred_type.append(type_output)

    misclassification_content = 0
    misclassification_type = 0
    for index in range(0, len(pred_content)):
        if pred_content[index] != test_content_label_set[index]:
            misclassification_content += 1
        if pred_type[index] != test_type_label_set[index]:
            misclassification_type += 1
    
    print("content acc: ", (1 - float(misclassification_content/len(pred_content))))
    print("type acc: ", (1 - float(misclassification_type/len(pred_type))))
Exemplo n.º 23
0
 def __call__(self, src_encodings, trg_encodings):
     src_avg = dy.sum_dim(src_encodings.as_tensor(),
                          [1]) / (src_encodings.as_tensor().dim()[0][1])
     trg_avg = dy.sum_dim(trg_encodings.as_tensor(),
                          [1]) / (trg_encodings.as_tensor().dim()[0][1])
     return self.dist_op(src_avg - trg_avg)
Exemplo n.º 24
0
 def mi_zero(self, joint):
     prior1 = dy.sum_dim(joint, [1])
     prior2 = dy.sum_dim(joint, [0])
     return self.mi_zero_with_priors(joint, prior1, prior2)
Exemplo n.º 25
0
def log_sum_exp(scores, num_labels):
    max_score_expr = dy.max_dim(scores)
    max_score_expr_broadcast = dy.concatenate([max_score_expr] * num_labels)
    return max_score_expr + dy.log(
        dy.sum_dim(dy.exp(scores - max_score_expr_broadcast), [0]))
Exemplo n.º 26
0
    def forward(self, sent1, sent2, label=None):
        """
        :param sent1: inputTensor
        :param sent2: inputTensor
        :param label: integer, range [0, 2]
        :return: loss
        """
        # Fix embedding
        eL = dy.parameter(self.embeddingLinear)
        sent1 = dy.inputTensor(sent1) * eL
        sent2 = dy.inputTensor(sent2) * eL

        # F step
        Lf1 = dy.parameter(self.mlpF1)
        Fsent1 = dy.rectify(dy.dropout(sent1, 0.2) * Lf1)
        Fsent2 = dy.rectify(dy.dropout(sent2, 0.2) * Lf1)
        Lf2 = dy.parameter(self.mlpF2)
        Fsent1 = dy.rectify(dy.dropout(Fsent1, 0.2) * Lf2)
        Fsent2 = dy.rectify(dy.dropout(Fsent2, 0.2) * Lf2)

        # Attention scoring
        score1 = Fsent1 * dy.transpose(Fsent2)
        prob1 = dy.softmax(score1)

        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        # Align pairs using attention
        sent1Pairs = dy.concatenate_cols([sent1, prob1 * sent2])
        sent2Pairs = dy.concatenate_cols([sent2, prob2 * sent1])

        # G step
        Lg1 = dy.parameter(self.mlpG1)
        Gsent1 = dy.rectify(dy.dropout(sent1Pairs, 0.2) * Lg1)
        Gsent2 = dy.rectify(dy.dropout(sent2Pairs, 0.2) * Lg1)
        Lg2 = dy.parameter(self.mlpG2)
        Gsent1 = dy.rectify(dy.dropout(Gsent1, 0.2) * Lg2)
        Gsent2 = dy.rectify(dy.dropout(Gsent2, 0.2) * Lg2)

        # Sum
        Ssent1 = dy.sum_dim(Gsent1, [0])
        Ssent2 = dy.sum_dim(Gsent2, [0])

        concat = dy.transpose(dy.concatenate([Ssent1, Ssent2]))

        # H step
        Lh1 = dy.parameter(self.mlpH1)
        Hsent = dy.rectify(dy.dropout(concat, 0.2) * Lh1)
        Lh2 = dy.parameter(self.mlpH2)
        Hsent = dy.rectify(dy.dropout(Hsent, 0.2) * Lh2)

        # Final layer
        finalLayer = dy.parameter(self.finaLinear)
        # final = dy.softmax(dy.transpose(Hsent * finalLayer))
        final = dy.transpose(Hsent * finalLayer)

        if label != None:  # Label can be 0...
            return dy.pickneglogsoftmax(final, label)
        else:
            out = dy.softmax(final)
            chosen = np.argmax(out.npvalue())
            return chosen
Exemplo n.º 27
0
 def calc_scores(self, x: dy.Expression) -> dy.Expression:
   model_score = self.output_projector.transform(x)
   if self.lexicon_type == 'bias':
     model_score += dy.sum_dim(dy.log(self.calculate_dict_prob(x) + self.lexicon_alpha), [1])
   return model_score