def build_graph(self, x):
        conv_W_1 = dy.parameter(self.params['conv_W_1'])
        conv_b_1 = dy.parameter(self.params['conv_b_1'])
        conv_W_2 = dy.parameter(self.params['conv_W_2'])
        conv_b_2 = dy.parameter(self.params['conv_b_2'])
        conv_W_3 = dy.parameter(self.params['conv_W_3'])
        conv_b_3 = dy.parameter(self.params['conv_b_3'])
        W = dy.parameter(self.params['W'])
        b = dy.parameter(self.params['b'])

        (n, d), _ = x.dim()
        x = dy.reshape(x, (1, n, d))

        # 一维卷积网络
        conv_1 = dy.tanh(
            dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False))
        conv_2 = dy.tanh(
            dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False))
        conv_3 = dy.tanh(
            dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False))

        pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1'])))
        pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2'])))
        pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3'])))

        # 全连接分类
        pool = dy.concatenate([pool_1, pool_2, pool_3], 0)
        logit = dy.dot_product(pool, W) + b
        return logit
Пример #2
0
 def transduce(self, encodings):
   inp = encodings
   dim = inp.dim()
   if dim[0][1] < self.ngram_size:
     pad = dy.zeros((self.embed_dim, self.ngram_size-dim[0][1]))
     inp = dy.concatenate([inp, pad], d=1)
     dim = inp.dim()
   inp = dy.reshape(inp, (1, dim[0][1], dim[0][0]))
   encodings = dy.rectify(dy.conv2d_bias(inp, dy.parameter(self.filter), dy.parameter(self.bias), stride=(1, 1), is_valid=True))
   return dy.max_dim(dy.max_dim(encodings, d=1), d=0)
Пример #3
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = src.dim()[0][1]
    # src_channels = 1
    batch_size = src.dim()[1]

    # convolution and pooling layers
    # src dim is ((40, 1000), 128)
    src = padding(src, self.filter_width[0]+3)
    l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128)
    pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128)

    pool1 = padding(pool1, self.filter_width[1]+3)
    l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128)
    pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128)

    pool2 = padding(pool2, self.filter_width[2])
    l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128)
    pool3 = dy.max_dim(l3, d = 1)

    my_norm = dy.l2_norm(pool3) + 1e-6
    output = dy.cdiv(pool3,my_norm)
    output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size)

    return ExpressionSequence(expr_tensor=output)
Пример #4
0
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    Waux_sm_express = dy.parameter(Waux_sm)
    baux_sm_express = dy.parameter(baux_sm)
    # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step.
    # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding.
    # Padd with index 0. (so we're treating the pad words as UNK (?))
    if len(words) < WIN_SIZE:
        words += [0] * (WIN_SIZE-len(words))

    # Convolution + pooling layer
    cnn_in = dy.concatenate([W_emb[x] for x in words], d=1) # concat repr of all words
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1) # Is this max pooling?
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out) # Is this ReLU activation?

    # get scores for either task
    scores_main = W_sm_express * pool_out + b_sm_express
    scores_aux = Waux_sm_express * pool_out + baux_sm_express
    return scores_main, scores_aux
Пример #5
0
    def _build_computation_graph(self, words, train_mode=True):
        """
        Builds the computational graph.
        """
        dy.renew_cg()
        # turn parameters into expressions
        softmax_weight_exp = dy.parameter(self.softmax_weight)
        softmax_bias_exp = dy.parameter(self.softmax_bias)

        word_reps = [self._word_rep(word) for word in words]
        embs = dy.concatenate(word_reps, d=1)

        if self.pooling_method == "average":
            average_emb = dy.mean_dim(embs, d=1)
        elif self.pooling_method == "max":
            average_emb = dy.max_dim(embs, d=1)
        else:
            raise NotImplementedError

        average_emb = dy.reshape(average_emb, (self.word_embedding_size,))

        if self.average_dropout is not None:
            dy.dropout(average_emb, p=self.average_dropout)

        return softmax_weight_exp * average_emb + softmax_bias_exp
Пример #6
0
 def transduce(self, embeds):
     expr_seq = []
     seq_len = embeds.dim()[0][1]
     for i in range(seq_len):
         expr_seq.append(dy.max_dim(dy.select_cols(embeds, [i]), 1))
     encodings = self.seq_transducer.transduce(ExpressionSequence(expr_seq))
     return self.seq_transducer.get_final_states()[-1].main_expr()
Пример #7
0
    def decode(self, emissions):
        """Viterbi decode to find the best sequence.

        :param emissions: List[dy.Expression]

        Returns:
            List[int], dy.Expression ((1,), B)
        """
        if self.add_ends:
            emissions = CRF._prep_input(emissions)
        backpointers = []
        transitions = self.transitions

        inits = [-1e4] * self.n_tags
        inits[self.start_idx] = 0
        alphas = dy.inputVector(inits)

        for emission in emissions:
            next_vars = dy.colwise_add(dy.transpose(transitions), alphas)
            best_tags = np.argmax(next_vars.npvalue(), 0)
            v_t = dy.max_dim(next_vars, 0)
            alphas = v_t + emission
            backpointers.append(best_tags)

        terminal_expr = alphas + dy.pick(transitions, self.end_idx)
        best_tag = np.argmax(terminal_expr.npvalue())
        path_score = dy.pick(terminal_expr, best_tag)

        best_path = [best_tag]
        for bp_t in reversed(backpointers):
            best_tag = bp_t[best_tag]
            best_path.append(best_tag)
        _ = best_path.pop()
        best_path.reverse()
        return best_path, path_score
Пример #8
0
    def calculateScores(self, instance, vectors, network, scores, isTraining):
        dReprCache = {}
        for dId in range(-1, len(instance.sentence)):
            depReprs = self.__featReprBuilder.extractAndBuildFeatRepr(
                gfeatures.FeatId.DEP, dId, instance.sentence, vectors,
                isTraining)
            depRepr = dynet.esum(depReprs) if len(depReprs) > 0 else None
            dReprCache[dId] = (depRepr, len(depReprs))

        for hId in range(-1, len(instance.sentence)):
            headReprs = self.__featReprBuilder.extractAndBuildFeatRepr(
                gfeatures.FeatId.HEAD, hId, instance.sentence, vectors,
                isTraining)
            headRepr = dynet.esum(headReprs) if len(headReprs) > 0 else None

            for dId in range(-1, len(instance.sentence)):
                depRepr, depNr = dReprCache[dId]
                distRepr = self.__featReprBuilder.onlyBuildFeatRepr(
                    gfeatures.FeatId.DIST, (hId, dId), isTraining)

                featRepr = [headRepr, depRepr]
                featReprNr = len(headReprs) + depNr
                if distRepr != None:
                    featRepr.append(distRepr)
                    featReprNr += 1

                assert featReprNr == self.__featReprBuilder.getNrOfFeatures()
                featRepr = dynet.esum([f for f in featRepr if f is not None])
                netOut = network.buildOutput(featRepr, isTraining=isTraining)

                scores.addOutput(hId, dId, netOut)
                scores.addScore(hId, dId, dynet.max_dim(netOut).scalar_value())
Пример #9
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Пример #10
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Пример #11
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE - len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn,
                             b_cnn,
                             stride=(1, 1),
                             is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print(display_activations(words, activations))
    print('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print('  bias=%s' % bias)
    contributions = W * features
    print(' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print('     good (%.4f): %s' % (scores[3], contributions[3]))
    print('very good (%.4f): %s' % (scores[4], contributions[4]))
Пример #12
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
def softmax(x):
    """
    Compute the softmax function in tensorflow.

    You might find the tensorflow functions tf.exp, tf.reduce_max,
    tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
    not need to use all of these functions). Recall also that many common
    tensorflow operations are sugared (e.g. x * y does a tensor multiplication
    if x and y are both tensors). Make sure to implement the numerical stability
    fixes as in the previous homework!

    Args:
        x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
                  represented by row-vectors. (For simplicity, no need to handle 1-d
                  input as in the previous homework)
    Returns:
        out: tf.Tensor with shape (n_sample, n_features). You need to construct this
                  tensor in this problem.
    """

    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp))

    out = dy.cdiv(x_exp, sum_exp)
    ### END YOUR CODE

    return out
Пример #14
0
def compute_loss(model, prev_state, current_state, action, reward, step_num):
    q = dy.pick(model.forward(prev_state), action)
    v = dy.max_dim(model.forward(current_state))

    expval = v * math.pow(GAMMA, step_num) + reward

    loss = q - expval
    return loss
Пример #15
0
 def exprseq_pooling(self, exprseq):
     # Reduce to vector
     if exprseq.expr_tensor != None:
         if len(exprseq.expr_tensor.dim()[0]) > 1:
             return dy.max_dim(exprseq.expr_tensor, d=1)
         else:
             return exprseq.expr_tensor
     else:
         return dy.emax(exprseq.expr_list)
Пример #16
0
 def exprseq_pooling(self, exprseq):
   # Reduce to vector
   exprseq = ExpressionSequence(expr_tensor=exprseq.mask.add_to_tensor_expr(exprseq.as_tensor(),-1e10), mask=exprseq.mask)
   if exprseq.expr_tensor != None:
     if len(exprseq.expr_tensor.dim()[0]) > 1:
       return dy.max_dim(exprseq.expr_tensor, d=1)
     else:
       return exprseq.expr_tensor
   else:
     return dy.emax(exprseq.expr_list)
def softmax(x):
    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    x_sum = dy.sum_cols(x_exp)
    x_tmp = dy.zeroes(x.dim()[0])
    x_tmp = dy.colwise_add(x_tmp, x_sum)
    out = dy.cdiv(x_exp, x_tmp)
    ### END YOUR CODE
    return out
Пример #18
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Пример #19
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Пример #20
0
    def conv(input_):
        """Perform the 1D conv.

        :param input: dy.Expression ((1, T, dsz), B)

        Returns:
            dy.Expression ((cmotsz,), B)
        """
        c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False)
        activation = dy.rectify(c)
        mot = dy.reshape(dy.max_dim(activation, 1), (cmotsz, ))
        return mot
Пример #21
0
 def calc_loss(self, scores, axis, true, importance):
     ret = [
         i * dy.pickneglogsoftmax(scores, t)
         for t, i in zip(true, importance)
     ]
     if self.loss == "max_margin":
         ret.append(
             dy.max_dim(
                 dy.log_softmax(scores,
                                restrict=list(
                                    set(range(self.num_labels[axis])) -
                                    set(true)))))
     return ret
Пример #22
0
  def optimize(self, environment, prev_pos, action, next_pos, reward):
    # Get Q(s_t, a_t): predictions of action taken in environment at
    # previous position
    q = dy.pick(self.forward(environment, prev_pos), action)

    # V: max of Q at next state
    v = dy.max_dim(self.forward(environment, next_pos))

    expval = v * GAMMA + reward

    loss = q - expval
    loss.backward()
    self.trainer.update()
Пример #23
0
def log_sum_exp_dim_0(x):
    # numerically stable log_sum_exp
    dims = x.dim()
    max_score = dy.max_dim(x, 0)  # (dim_1, batch_size)
    if len(dims[0]) == 1:
        max_score_extend = max_score
    else:
        max_score_reshape = dy.reshape(max_score, (1, dims[0][1]), batch_size=dims[1])
        max_score_extend = dy.concatenate([max_score_reshape] * dims[0][0])
    x = x - max_score_extend
    exp_x = dy.exp(x)
    # (dim_1, batch_size), if no dim_1, return ((1,), batch_size)
    log_sum_exp_x = dy.log(dy.mean_dim(exp_x, d=[0], b=False) * dims[0][0])
    return log_sum_exp_x + max_score
Пример #24
0
    def encode(self, word, training=False):
        W_cnn = dy.parameter(self.W_cnn)
        b_cnn = dy.parameter(self.b_cnn)

        embs = dy.concatenate(
            [dy.lookup(self.char_embeds, x) for x in word[:45]], d=1)
        if self.dropout > 0 and training:
            embs = dy.dropout(embs, self.dropout)
        cnn_out = dy.conv2d_bias(
            embs, W_cnn, b_cnn, stride=(1, 1),
            is_valid=False)  # maybe change this? diagram shows padding
        max_pool = dy.max_dim(cnn_out, d=1)
        rep = dy.reshape(dy.tanh(max_pool), (self.filter_size, ))
        return rep
Пример #25
0
    def compose(self, embeds):
        if type(embeds) != list:
            embeds = [
                dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1])
            ]

        if len(embeds) < self.ngram_size:
            embeds.extend([dy.zeros(self.embed_dim)] *
                          (self.ngram_size - len(embeds)))

        embeds = dy.transpose(
            dy.concatenate([dy.concatenate_cols(embeds)], d=2), [2, 1, 0])
        embeds = dy.conv2d_bias(embeds, self.filter, self.bias,
                                (self.embed_dim, 1))
        embeds = dy.max_dim(dy.pick(embeds, index=0), d=0)

        return self.transform.transform(embeds)
Пример #26
0
    def _build_computation_graph(self, words, train_mode=True):
        """
        Builds the computational graph.
        """
        dy.renew_cg()
        # turn parameters into expressions
        softmax_weight_exp = dy.parameter(self.softmax_weight)
        softmax_bias_exp = dy.parameter(self.softmax_bias)

        # initialize the RNNs
        f_init = self.fwd_word_rnn.initial_state()
        b_init = self.bwd_word_rnn.initial_state()

        # cf_init = self.fwd_char_rnn.initial_state()
        # cb_init = self.bwd_char_rnn.initial_state()

        # only use word-level for now
        word_reps = [self._word_rep(word) for word in words]

        if train_mode and self.add_word_noise:
            word_reps = [dy.noise(word_rep, 0.05) for word_rep in word_reps]

        # feed word vectors into biLSTM
        fw_exps = f_init.transduce(word_reps)
        bw_exps = b_init.transduce(reversed(word_reps))

        if self.pooling_method == "last":
            average_lstm = dy.concatenate([fw_exps[-1], bw_exps[-1]])
        else:
            bi_exps = [
                dy.concatenate([f, b])
                for f, b in zip(fw_exps, reversed(bw_exps))
            ]
            bi_exps = dy.concatenate(bi_exps, d=1)

            if self.pooling_method == "average":
                average_lstm = dy.mean_dim(bi_exps, d=1)
            elif self.pooling_method == "max":
                average_lstm = dy.max_dim(bi_exps, d=1)
            else:
                raise NotImplementedError

        if self.average_dropout is not None:
            average_lstm = dy.dropout(average_lstm, p=self.average_dropout)

        return softmax_weight_exp * average_lstm + softmax_bias_exp
Пример #27
0
    def _build_tagging_graph(self, words, train_mode=True):
        """
        Builds the computational graph.

        Model similar to http://aclweb.org/anthology/D/D14/D14-1181.pdf.
        """
        dy.renew_cg()
        # turn parameters into expressions
        mlp_output = dy.parameter(self.pO)

        W_cnn_expressions = []
        b_cnn_expressions = []

        for W_cnn, b_cnn in zip(self.W_cnns, self.b_cnns):
            W_cnn_expressions.append(dy.parameter(W_cnn))
            b_cnn_expressions.append(dy.parameter(b_cnn))

        if len(words) < self._cnn_window_size:
            pad_char = "<*>"
            words += [pad_char] * (self._cnn_window_size - len(words))

        if self._char_level:
            cnn_in = dy.concatenate(self._chars_rep(words), d=1)
        else:
            word_reps = [self._word_rep(word) for word in words]
            cnn_in = dy.concatenate(word_reps, d=1)

        pools_out = []
        for W_cnn_express, b_cnn_express in zip(W_cnn_expressions,
                                                b_cnn_expressions):
            cnn_out = dy.conv2d_bias(cnn_in,
                                     W_cnn_express,
                                     b_cnn_express,
                                     stride=(1, 1),
                                     is_valid=False)

            # max-pooling
            pool_out = dy.max_dim(cnn_out, d=1)
            pool_out = dy.reshape(pool_out, (self._cnn_filter_size, ))

            pools_out.append(pool_out)

        pools_concat = dy.concatenate(pools_out)

        return mlp_output * pools_concat
Пример #28
0
def loss_upper_bound(gold_tags, idx, beam_costs_prev, scores, beam_size):
    beam_size_prev, num_tags = scores.dim()[0]
    next_beam_size = beam_size if idx < len(gold_tags) - 1 else 1

    scores_flat = dy.reshape(scores, (beam_size_prev * num_tags,))
    costs_flat = dynet_compute_costs_flat(gold_tags, idx, beam_costs_prev)

    sigma_star = np.argsort(costs_flat)
    gold_idx = sigma_star[0]
    scores_flat_np = scores_flat.npvalue()
    sigma_hat = np.argsort(scores_flat_np)[::-1]

    scores_delta = scores_flat - scores_flat[gold_idx] + 1.0
    costs_delta = costs_flat - costs_flat[gold_idx]
    # mask those that are inside the beam.
    costs_delta[sigma_star[:next_beam_size]] = 0.0
    deltas = dy.cmult(dy.inputTensor(costs_delta), scores_delta)
    return dy.max_dim(deltas)
Пример #29
0
    def on_calc_additional_loss(self, *args, **kwargs):
        seq_len = len(self.last_output)

        loss_expr = 0
        for pos_i in range(seq_len):
            input_i = self.last_output[pos_i]
            affine = self.linear_layer(input_i)
            softmax_out = dy.softmax(affine)
            if self.mode == "entropy":
                loss_expr = loss_expr - dy.sum_dim(
                    dy.cmult(dy.log(softmax_out), softmax_out), d=[0])
            elif self.mode == "max":
                loss_expr = loss_expr - dy.log(dy.max_dim(softmax_out))
            else:
                raise ValueError(f"unknown mode {self.mode}")
        # loss_expr = loss_expr * (self.scale / seq_len)
        loss_expr = loss_expr * self.scale

        return losses.FactoredLossExpr({"enc_entropy": loss_expr})
Пример #30
0
 def __buildErrorOutputs(self, scores, correctTree, predictedTree):
     result = [ ]
     for tPos in range(correctTree.nrOfTokens()):
         corrHead = correctTree.getHead(tPos)
         predHead = predictedTree.getHead(tPos)
     
         corrOutputs = scores.getOutput(corrHead, tPos)
         predOutputs = scores.getOutput(predHead, tPos)
         
         corrLblId = self.__lblDict.getLblId(correctTree.getLabel(tPos))
         predLblId = self.__lblDict.getLblId(predictedTree.getLabel(tPos))
         
         ### tree errors    
         if corrHead != predHead:
             result.append((predOutputs[predLblId], dynet.max_dim(corrOutputs)))
         
         ### lbl errors
         worstLblId = max((scr, lId) for (lId, scr) in enumerate(corrOutputs.value()) if lId != corrLblId)[1]
         result.append((corrOutputs[worstLblId], corrOutputs[corrLblId]))
                      
     return result
Пример #31
0
    def run_classifier(self, common_top_recur, word_inputs, domain_flag):
        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        cnn_filter = []
        for filt in self.filter:
            cnn_filter.append(dy.parameter(filt))
        cnn_W = dy.parameter(self.class_W)

        cnn_input = dy.reshape(common_top_recur,
                               (1, seq_len, 2 * self.lstm_hiddens), batch_size)
        # print(cnn_input.npvalue().shape)
        cnn_out_list = []
        for i in range(len(cnn_filter)):
            cnn_out = dy.conv2d(cnn_input,
                                cnn_filter[i], [1, 1],
                                is_valid=False)  # len*batch*filter_num
            # print(cnn_out.npvalue().shape)
            pool_out = dy.max_dim(cnn_out, d=1)
            # print(pool_out.npvalue().shape)
            pool_out = dy.reshape(pool_out, (self.filter_size, ), batch_size)
            # print(pool_out.npvalue().shape)
            pool_out = dy.rectify(pool_out)
            cnn_out_list.append(pool_out)
        final_out = dy.concatenate(cnn_out_list)
        result = cnn_W * final_out

        predict = np.argmax(result.npvalue(), axis=0)
        # print(predict)
        cor = 0.
        for pre in predict:
            if int(pre) == domain_flag:
                cor += 1
        class_accurate = cor / batch_size

        target = [domain_flag] * batch_size  # [0,0,0,0]
        # print(result.npvalue().shape, np.array(target).shape)
        classes_loss = dy.pickneglogsoftmax_batch(result, target)
        class_loss = dy.sum_batches(classes_loss) / batch_size
        # print(class_loss.npvalue().shape)
        return class_loss, class_accurate
Пример #32
0
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step.
    # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding.
    # Padd with index 0. (so we're treating the pad words as UNK (?))
    if len(words) < WIN_SIZE:
        words += [0] * (WIN_SIZE - len(words))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in words],
                            d=1)  # concat repr of all words
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn_express,
                             b_cnn_express,
                             stride=(1, 1),
                             is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)
    return W_sm_express * pool_out + b_sm_express
Пример #33
0
 def calc_loss(self, scores, axis, true, importance):
     ret = [i * dy.pickneglogsoftmax(scores, t) for t, i in zip(true, importance)]
     if self.loss == "max_margin":
         ret.append(dy.max_dim(dy.log_softmax(scores, restrict=list(set(range(self.num_labels[axis])) - set(true)))))
     return ret
Пример #34
0
 def attend(self, context, x):
     context_cols = dy.concatenate_cols(context)
     context_emb = dy.max_dim(context_cols, 1)
     return context_emb, None