def calculate_score(feature, count): feature_score = self.dense_layer( dn.concatenate( [span_feature, dn.inputTensor(np.array(feature))])) if self.options.use_count: count_score = self.count_scale_2.expr() * dn.log( dn.abs(self.count_scale.expr() * count + 1)) else: count_score = 0 return feature_score + count_score
def backward(self, word_vectors, label): dy.renew_cg() x = dy.inputTensor(word_vectors) y = dy.inputTensor(label) logit = self.build_graph(x) # q表示对正样本的加权 # 公式见https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits q = 15 l = 1 + (q - 1) * y loss = (1 - y) * logit + l * (dy.log(1 + dy.exp(-dy.abs(logit))) + dy.rectify(-logit)) res = loss.value() loss.backward() return res
def __getExpr(self, sentence, i, j): if sentence[i].headfov is None: sentence[i].headfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) _inputVector = concatenate( [sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov), dynet.cmult(sentence[i].headfov, sentence[j].modfov)]) if self.hidden_units > 0: output = self.outLayer.expr() * self.activation( self.hid2Bias.expr() + self.hidLayer.expr() * self.activation( _inputVector + self.hidBias.expr())) else: output = self.outLayer.expr() * self.activation(_inputVector + self.hidBias.expr()) return output
def __call__(self, premise, hypothesis, dropout=0.0): precode = self._ssbilstm(premise) hypcode = self._ssbilstm(hypothesis) conc_pre_hyp = dy.concatenate([precode, hypcode]) dist_pre_hyp = dy.abs(precode - hypcode) mult_pre_hyp = dy.cmult(precode, hypcode) x = dy.concatenate([conc_pre_hyp, dist_pre_hyp, mult_pre_hyp]) expr = list() for exp in self._params: expr.append(dy.parameter(exp)) for i in range(0, len(expr) - 2, 2): if '-relu' in sys.argv: x = dy.rectify((expr[i] * x) + expr[i + 1]) else: x = dy.tanh((expr[i] * x) + expr[i + 1]) if dropout != 0.0: x = dy.dropout(x, dropout) output = dy.softmax((expr[-2] * x) + expr[-1]) return output
def l2_normalize(x): epsilon = np.finfo(float).eps * dy.ones(pred.dim()[0]) norm = dy.sqrt(dy.sum_elems(dy.square(x))) sign = dy.cdiv(x, dy.bmax(dy.abs(x), epsilon)) return dy.cdiv(dy.cmult(sign, dy.bmax(dy.abs(x), epsilon)), dy.bmax(norm, epsilon[0]))
#wez y i usun sezonowosc i level for i in range(n, len(df) - h): inputs = y[i - n:i] #n okresy curr_season = s[i - n:i] inputs = dy.cdiv(inputs, l[i]) inputs = dy.cdiv(inputs, curr_season) inputs = dy.log(inputs) reseasonalize = s[i + 1] #poprzedni okres +1 krok preds.append(dy.exp(fcstr(inputs)) * l[i] * reseasonalize) outputs.append(y[i + 1]) #+1 krok predictions = dy.concatenate(preds) outputs = dy.concatenate(outputs) #log_err = dy.mean_elems(dy.abs(dy.log(outputs)-dy.log(predictions))) err = dy.mean_elems(dy.abs(outputs - predictions)) loss = err + level_loss trainer = dy.SimpleSGDTrainer(m, learning_rate=0.25) loss_value = loss.value() print(seasonInit.npvalue()) for i in range(2000): loss.backward() trainer.update() loss_value = loss.value(recalculate=True) trainer.learning_rate *= 0.992 #0.99 lr decay if i % 50 == 0 or i < 10: print("the mae after step is:", err.value(recalculate=True))
def build_graph(pre_words, hy_words, holder): fl1_init = holder.fwdRNN_layer1.initial_state() bl1_init = holder.bwdRNN_layer1.initial_state() fl2_init = holder.fwdRNN_layer2.initial_state() bl2_init = holder.bwdRNN_layer2.initial_state() fl3_init = holder.fwdRNN_layer3.initial_state() bl3_init = holder.bwdRNN_layer3.initial_state() pre_wembs = [get_word_rep(w, holder) for w in pre_words] hy_wembs = [get_word_rep(w, holder) for w in hy_words] pre_fws = fl1_init.transduce(pre_wembs) pre_bws = bl1_init.transduce(reversed(pre_wembs)) pre_bi = [ dy.concatenate([word, f, b]) for word, f, b in zip(pre_wembs, pre_fws, reversed(pre_bws)) ] pre_fws2 = fl2_init.transduce(pre_bi) pre_bws2 = bl2_init.transduce(reversed(pre_bi)) pre_b_tag = [ dy.concatenate([word, f1, b1, f2, b2]) for word, f1, b1, f2, b2 in zip(pre_wembs, pre_fws, reversed(pre_bws), pre_fws2, reversed(pre_bws2)) ] pre_fws3 = fl3_init.transduce(pre_b_tag) pre_bws3 = bl3_init.transduce(reversed(pre_b_tag)) pre_b_tagtag = [ dy.concatenate([f3, b3]) for f3, b3 in zip(pre_fws3, reversed(pre_bws3)) ] pre_v_elemets_size = len(pre_b_tagtag[0].npvalue()) pre_row_num = len(pre_b_tagtag) pre_vecs_concat = dy.concatenate([v for v in pre_b_tagtag]) pre_mat = dy.reshape(pre_vecs_concat, (pre_v_elemets_size, pre_row_num)) pre_final = dy.concatenate([dy.kmax_pooling(v, 1, 0) for v in pre_mat]) hy_fws = fl1_init.transduce(hy_wembs) hy_bws = bl1_init.transduce(reversed(hy_wembs)) hy_bi = [ dy.concatenate([word, f, b]) for word, f, b in zip(hy_wembs, hy_fws, reversed(hy_bws)) ] hy_fws2 = fl2_init.transduce(hy_bi) hy_bws2 = bl2_init.transduce(reversed(hy_bi)) hy_b_tag = [ dy.concatenate([word, f1, b1, f2, b2]) for word, f1, b1, f2, b2 in zip( hy_wembs, hy_fws, reversed(hy_bws), hy_fws2, reversed(hy_bws2)) ] hy_fws3 = fl3_init.transduce(hy_b_tag) hy_bws3 = bl3_init.transduce(reversed(hy_b_tag)) hy_b_tagtag = [ dy.concatenate([f3, b3]) for f3, b3 in zip(hy_fws3, reversed(hy_bws3)) ] hy_v_elemets_size = len(hy_b_tagtag[0].npvalue()) hy_row_num = len(hy_b_tagtag) hy_vecs_concat = dy.concatenate([v for v in hy_b_tagtag]) hy_mat = dy.reshape(hy_vecs_concat, (hy_v_elemets_size, hy_row_num)) hy_final = dy.concatenate([dy.kmax_pooling(v, 1, 0) for v in hy_mat]) final = dy.concatenate([ pre_final, hy_final, dy.abs(pre_final - hy_final), dy.cmult(pre_final, hy_final) ]) W1 = dy.parameter(holder.W1) b1 = dy.parameter(holder.b1) W2 = dy.parameter(holder.W2) b2 = dy.parameter(holder.b2) mid = dy.rectify(W1 * final + b1) return W2 * mid + b2