def train(self, trainning_set): for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set: features = self.encode_sentence(sentence, pos, chars) loss = [] entity_embeds = features[entity] attention, context = self.self_attend(features) ty = dy.vecInput(len(sentence)) ty.set([0 if i!=trigger else 1 for i in range(len(sentence))]) loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty)) h_t = dy.concatenate([context, entity_embeds]) hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr()) out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,)) label = dy.scalarInput(label) loss.append(dy.binary_log_loss(out_vector, label)) pres = [0] for pattern in rule: probs = self.decoder(features, pres) loss.append(-dy.log(dy.pick(probs, pattern))) pres.append(pattern) loss = dy.esum(loss) loss.backward() self.trainer.update() dy.renew_cg()
def __train(model, data): tagged_loss = 0 untagged_loss = 0 for index, sentence_report in enumerate(data): for phrase in sentence_report.all_phrases: loss = None encoded_phrase = __encode_sequence(model, phrase) if model.options.external_info != "no_info": encoded_phrase = dy.concatenate( [encoded_phrase, model.doclookup[sentence_report.app_id]]) y_pred = dy.logistic((model.mlp_w * encoded_phrase) + model.mlp_b) if sentence_report.mark: loss = dy.binary_log_loss(y_pred, dy.scalarInput(1)) else: loss = dy.binary_log_loss(y_pred, dy.scalarInput(0)) if sentence_report.mark: tagged_loss += loss.scalar_value() / (index + 1) else: untagged_loss += loss.scalar_value() / (index + 1) loss.backward() model.trainer.update() dy.renew_cg()
def __train(self, data): def encode_sequence(seq): rnn_forward = self.phrase_rnn[0].initial_state() for entry in seq: vec = self.wlookup[int(self.w2i.get(entry, 0))] rnn_forward = rnn_forward.add_input(vec) return rnn_forward.output() tagged_loss = 0 untagged_loss = 0 for index, sentence_report in enumerate(data): for phrase in sentence_report.all_phrases: loss = None encoded_phrase = encode_sequence(phrase) y_pred = dy.logistic((self.mlp_w*encoded_phrase) + self.mlp_b) if sentence_report.mark: loss = dy.binary_log_loss(y_pred, dy.scalarInput(1)) else: loss = dy.binary_log_loss(y_pred, dy.scalarInput(0)) if index % 1000 == 0: print("Description : {}".format(index+1)) print("Marked {} Prediction Result {} : ".format(sentence_report.mark, y_pred.scalar_value())) print("Tagged loss {} Untagged Loss {} Total loss {}".format(tagged_loss, untagged_loss, tagged_loss+untagged_loss)) if sentence_report.mark: tagged_loss += loss.scalar_value()/(index+1) else: untagged_loss += loss.scalar_value()/(index+1) loss.backward() self.trainer.update() dy.renew_cg()
def train_item(args, model, sentence): loss = None seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in sentence.preprocessed_sentence ] if len(seq) > 0: encoded_sequence = encode_sequence(model, seq, model.sentence_rnn) last_output = encoded_sequence[-1] global_max = max_pooling(encoded_sequence) global_min = average_pooling(encoded_sequence) context = dy.concatenate([last_output, global_max, global_min]) y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b) if sentence.permissions[args.permission_type]: loss = dy.binary_log_loss(y_pred, dy.scalarInput(1)) else: loss = dy.binary_log_loss(y_pred, dy.scalarInput(0)) loss.backward() model.trainer.update() loss_val = loss.scalar_value() dy.renew_cg() return loss_val return 0
def learn(self, seq): output, proj_x3 = self._predict(seq, runtime=False) # arcs for iSrc in range(len(seq)): for iDst in range(len(seq)): if iDst > iSrc: o = output[iSrc][iDst] # the softmax portion t = get_link(seq, iSrc, iDst) # if t==1: # self.losses.append(-dy.log(dy.pick(o, t))) self.losses.append(dy.binary_log_loss( o, dy.scalarInput(t))) # labels gs_chains, labels = self._get_gs_chains(seq) for chain, label in zip(gs_chains, labels): label_rnn = self.label_decoder.initial_state() for index in chain: label_rnn = label_rnn.add_input(proj_x3[index]) label_softmax = dy.softmax( self.label_w.expr(update=True) * label_rnn.output() + self.label_b.expr(update=True)) self.losses.append(-dy.log( dy.pick(label_softmax, self.encodings.label2int[label])))
def run_model(): data_in = np.array([[0, 0, 1, 1], [0, 1, 0, 1]]) data_out = np.array([0, 1, 1, 0]).reshape(1, 4) m = dy.Model() sgd = dy.SimpleSGDTrainer(m, 0.5) W = m.add_parameters((2, 2)) b = m.add_parameters(2) V = m.add_parameters((1, 2)) a = m.add_parameters(1) errors = [] for iter in range(ITERATIONS): dy.renew_cg() x = dy.inputTensor(data_in, batched=True) y = dy.inputTensor(data_out) h = dy.logistic(W * x + b) y_pred = dy.logistic((V * h) + a) y_pred = dy.reshape(y_pred, y.dim()[0]) loss = dy.binary_log_loss(y_pred, y) errors.append(loss.scalar_value() / 4) loss.backward() sgd.update() return errors
def loss_function(recon_x, x, mu, logvar): BCE = dy.binary_log_loss(recon_x, x) # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar)) return BCE + KLD
def create_xor_network(pW, pV, pb, inputs, expected_answer): dy.renew_cg() # new computation graph W = dy.parameter(pW) # add parameters to graph as expressions V = dy.parameter(pV) b = dy.parameter(pb) x = dy.vecInput(len(inputs)) x.set(inputs) y = dy.scalarInput(expected_answer) output = dy.logistic(V * (dy.tanh((W * x) + b))) loss = dy.binary_log_loss(output, y) return loss
def train_item(args, model, document): loss = None word_lookups = [] for preprocessed_sentence in document.preprocessed_sentences: seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in preprocessed_sentence ] if len(seq) > 0: word_lookups.append(seq) sentences_lookups = [] for seq in word_lookups: sentence_encode = encode_sequence(model, seq, model.sentence_rnn) global_max = max_pooling(sentence_encode) global_min = average_pooling(sentence_encode) if len(sentence_encode) > 0: last_out = sentence_encode[-1] context = dy.concatenate([last_out, global_max, global_min]) sentences_lookups.append(context) document_encode = encode_sequence(model, sentences_lookups, model.document_rnn) global_max = max_pooling(document_encode) global_min = average_pooling(document_encode) if len(document_encode) > 0: last_out = sentence_encode[-1] context = dy.concatenate([last_out, global_max, global_min]) y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b) if document.permissions[args.permission_type]: loss = dy.binary_log_loss(y_pred, dy.scalarInput(1)) else: loss = dy.binary_log_loss(y_pred, dy.scalarInput(0)) loss.backward() model.trainer.update() loss_val = loss.scalar_value() dy.renew_cg() return loss_val return 0
def train_item(args, model, sentence): loss = None seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in sentence.preprocessed_sentence ] if len(seq) > 0: encoded_sequence = encode_sequence(model, seq, model.sentence_rnn) global_max = max_pooling(encoded_sequence) global_min = average_pooling(encoded_sequence) if len(encoded_sequence) > 0: att_mlp_outputs = [] for e in encoded_sequence: mlp_out = (model.attention_w * e) + model.attention_b att_mlp_outputs.append(mlp_out) lst = [] for o in att_mlp_outputs: lst.append(dy.exp(dy.sum_elems(dy.cmult(o, model.att_context)))) sum_all = dy.esum(lst) probs = [dy.cdiv(e, sum_all) for e in lst] att_context = dy.esum( [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)]) context = dy.concatenate([att_context, global_max, global_min]) #context = dy.concatenate([att_context]) y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b) if sentence.permissions[args.permission_type]: loss = dy.binary_log_loss(y_pred, dy.scalarInput(1)) else: loss = dy.binary_log_loss(y_pred, dy.scalarInput(0)) loss.backward() model.trainer.update() loss_val = loss.scalar_value() dy.renew_cg() return loss_val return 0
def loss_function(recon_x, x, mu, logvar): BCE = dy.binary_log_loss( recon_x, x ) # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar)) return BCE + KLD
def create_xor_network(mW, mb, mV, input, expected_output): # For each input, create the computational graph and get the loss dy.renew_cg() W = dy.parameter(mW) b = dy.parameter(mb) V = dy.parameter(mV) x = dy.vecInput(len(input)) x.set(input) y = dy.scalarInput(expected_output) graph_output = dy.logistic(V*(dy.tanh(W*x+b))) loss = dy.binary_log_loss(graph_output, y) return loss
def create_network(pWeight, inputs, expected_answer): # new computation graph dy.renew_cg() # add parameters to graph as expressions Weight = dy.parameter(pWeight) input_dy = dy.vecInput(len(inputs)) input_dy.set(inputs) target_output = dy.scalarInput(expected_answer) output = dy.logistic(dy.tanh(Weight * input_dy)) loss = dy.binary_log_loss(output, target_output) return loss
def batch_loss(self, sents, train=True): probas = self._predict(sents, train) # we pack all predicted probas into one vector of length batch_size probas = dy.concatenate(probas) # we make a dynet vector out of the true ys y_true = dy.inputVector([y for y, _ in sents]) # classification loss: we use the logistic loss # this function automatically sums over all entries. total_loss = dy.binary_log_loss(probas, y_true) return total_loss
def get_loss(self, article_X, article_Y): dy.renew_cg() input_seqs_X = [[self.common.get_wemb(w) for w in sent] for sent in article_X] rep = self.get_article_rep(input_seqs_X) on_probs = dy.logistic(rep) answers_numpy = np.zeros(len(self._w2i)) present = {self._w2i[w]: True for sent in article_Y for w in sent} for i in present: answers_numpy[i] = 1.0 answers = dy.inputTensor(answers_numpy) loss = dy.binary_log_loss(on_probs, answers) return loss
def train(self, trainning_set): loss_chunk = 0 loss_all = 0 total_chunk = 0 total_all = 0 losses = [] for datapoint in trainning_set: query = datapoint[0] eq = dy.average([ self.word_embeddings[self.w2i[w]] if w in self.w2i else self.word_embeddings[0] for w in query ]) hyper = datapoint[1] eh = dy.average([ self.word_embeddings[self.w2i[w]] if w in self.w2i else self.word_embeddings[0] for w in hyper ]) t = dy.scalarInput(datapoint[2]) Ps = [] for i in range(self.k): Ps.append(self.Phis[i].expr() * eq) P = dy.transpose(dy.concatenate_cols(Ps)) s = P * eh y = dy.reshape(dy.logistic(self.W.expr() * s + self.b.expr()), (1, )) losses.append(dy.binary_log_loss(y, t)) # process losses in chunks if len(losses) > 50: loss = dy.esum(losses) l = loss.scalar_value() loss.backward() self.trainer.update() dy.renew_cg() losses = [] loss_chunk += l loss_all += l total_chunk += 1 total_all += 1 # consider any remaining losses if len(losses) > 0: loss = dy.esum(losses) loss.scalar_value() loss.backward() self.trainer.update() dy.renew_cg() print(f'loss: {loss_all/total_all:.4f}')
def train(self, tweets): loss_all = 0 total_all = 0 start_all = time.time() for i, tweet in enumerate(tweets): v = self.extract_features(tweet) output = self.forward(v) gold = tweet.emotions loss = dy.binary_log_loss(output, dy.inputTensor(gold)) loss_all += loss.npvalue()[0] total_all += 1 loss.scalar_value() loss.backward() self.trainer.update() dy.renew_cg() end = time.time() print(f'loss: {loss_all/total_all:.4f}\ttime: {end-start_all:,.2f} secs')
def compute_loss(self, pred): """Adds Ops for the loss function to the computational graph. In this case we are using cross entropy loss. The loss should be averaged over all examples in the current minibatch. Args: pred: A tensor of shape (batch_size, n_classes) containing the output of the neural network before the softmax layer. Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE y = dy.inputTensor(np.transpose(self.labels), batched=True) losses = dy.binary_log_loss(pred, y) loss = dy.sum_batches(losses) / self.config.batch_size ### END YOUR CODE return loss
def calc_sent_loss(sent): # Create a computation graph dy.renew_cg() # Get embeddings for the sentence emb = [W_w_p[x] for x in sent] # Step through the sentence and calculate binary prediction losses all_losses = [] for i, my_emb in enumerate(emb): scores = dy.logistic(W_c * my_emb) pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] + [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)]) word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)] for x in pos_words] word_repr = [dy.inputVector(x) for x in word_repr] all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr]) return dy.esum(all_losses)
def calc_sent_loss(sent): # Create a computation graph dy.renew_cg() # Get embeddings for the sentence emb = [W_w_p[x] for x in sent] # Step through the sentence and calculate binary prediction losses all_losses = [] for i, my_emb in enumerate(emb): scores = dy.logistic(W_c * my_emb) pos_words = ( [sent[x] if x >= 0 else S for x in range(i - N, i)] + [sent[x] if x < len(sent) else S for x in range(i + 1, i + N + 1)]) word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)] for x in pos_words] word_repr = [dy.inputVector(x) for x in word_repr] all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr]) return dy.esum(all_losses)
def main(): #Read csv data into a dataframe data = pd.read_csv("Reviews.csv").sample(1000) #data = pd.read_csv("Reviews.csv").sample(frac=1) split = round(data.shape[0]*0.8) print (split) #Set up the training parameters vocab_sizes = 30000 #cutoff for num of most common words for text #max_len = 300 #cutoff for length of sequence for text #Convert training text to sequence t = Tokenizer(num_words=vocab_size) t.fit_on_texts(data['Text']) x = t.texts_to_sequences(data['Text'].astype(str)) y = data['Score'].apply(lambda x: int(x >= 4)) m = dy.Model() trainer = dy.AdamTrainer(m) embeds = m.add_lookup_parameters((vocab_size, embed_size)) acceptor = LstmAcceptor(embed_size, hidden_size, 1, m) sum_of_losses = 0.0 for epoch in range(10): for sequence, label in zip(x[:split], y[:split]): dy.renew_cg() label = dy.scalarInput(label) vecs = [embeds[i] for i in sequence] preds = acceptor(vecs) loss = dy.binary_log_loss(preds, label) sum_of_losses += loss.npvalue() loss.backward() trainer.update() print (sum_of_losses / split) sum_of_losses = 0.0 print ("\n\nPrediction time!\n") for sequence, label in zip(x[split:], y[split:]): dy.renew_cg() vecs = [embeds[i] for i in sequence] preds = acceptor(vecs).value() print (preds, label)
def calculate_loss_classification(self, input, output): #dy.renew_cg() weight_matrix_array = [] biases_array = [] for (W, b) in zip(self.weight_matrix_array, self.biases_array): weight_matrix_array.append(dy.parameter(W)) biases_array.append(dy.parameter(b)) acts = self.act w = weight_matrix_array[0] b = biases_array[0] act = acts[0] intermediate = act(dy.affine_transform([b, w, input])) activations = [intermediate] for (W, b, g) in zip(weight_matrix_array[1:], biases_array[1:], acts[1:]): pred = g(dy.affine_transform([b, W, activations[-1]])) activations.append(pred) #print output.value(), pred.value() losses = dy.binary_log_loss(pred, output) return losses
def calc_nll(self, src, trg): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) if self.mode in ["avg_mlp", "final_mlp"]: if self.mode=="avg_mlp": if encodings.mask: encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1] elif self.mode=="final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode=="lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True)) enc_lin.append(step_linear) if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") idxs = ([], []) for batch_i in range(trg.batch_size()): for word in set(trg[batch_i]): if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}: idxs[0].append(word) idxs[1].append(batch_i) trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, ) loss_expr = dy.binary_log_loss(scores, trg_scores) return loss_expr
def train(self, trainning_set): for sentence, eid, entity, trigger, label, pos, chars in trainning_set: features = self.encode_sentence(sentence, pos, chars) loss = [] entity_embeds = dy.average([features[word] for word in entity]) attention, context = self.attend(features) # loss.append(-dy.log(dy.pick(attention, trigger))) h_t = dy.concatenate([context, entity_embeds]) hidden = dy.tanh(self.lb * h_t + self.lb_bias) out_vector = dy.reshape( dy.logistic(self.lb2 * hidden + self.lb2_bias), (1, )) # probs = dy.softmax(out_vector) label = dy.scalarInput(label) loss.append(dy.binary_log_loss(out_vector, label)) # Get decoding losses last_output_embeddings = self.pattern_embeddings[0] s = self.decoder_lstm.initial_state().add_input( dy.concatenate( [dy.vecInput(self.hidden_dim), last_output_embeddings])) rule.append(1) for pattern in rule: h_t = s.output() context = self.attend(features, h_t) out_vector = self.pt.expr() * dy.concatenate( [context, h_t]) + self.pt_bias.expr() probs = dy.softmax(out_vector) loss.append(-dy.log(dy.pick(probs, pattern))) last_output_embeddings = self.pattern_embeddings[pattern] s = s.add_input( dy.concatenate([context, last_output_embeddings])) loss = dy.esum(loss) loss = dy.esum(loss) loss.backward() self.trainer.update() dy.renew_cg()
def _create_network(self, inputs, expected_answer): dy.renew_cg() # new computation graph self.weights = { 'h1': dy.parameter(self._pw1), 'h2': dy.parameter(self._pw2), 'out': dy.parameter(self._pw3) } self.biases = { 'b1': dy.parameter(self._pb1), 'b2': dy.parameter(self._pb2), 'out': dy.parameter(self._pb3) } x = dy.vecInput(len(inputs)) # x = dy.vecInput(len(X_train[1])) x.set(inputs) x = dy.reshape(x, (1, X_train.shape[1])) y = dy.inputTensor(expected_answer) yy = dy.reshape(y, (20, 1)) output = self._multilayer_perceptron(x) loss = dy.binary_log_loss(output, yy) return loss
# `autobatching` allows us to feed each datapoint in one at a time, and `dyNet` will figure out how to "optimize" the operations. Let's iterate through our training data. # In[23]: # iterate through the dataset for i in range(trainX.shape[0]): # prepare input x = np.expand_dims(trainX[i], axis=0) # must make it a vector with dimensions (1 x voc_size) # prepare output y = dy.scalarInput(trainY[i]) # make a forward pass pred = forward_pass(x) # calculate loss for each example loss = dy.binary_log_loss(pred, y) losses.append(loss) pred y # Now let's accumulate the loss and backpropogate it. # In[24]: # get total loss for dataset total_loss = dy.esum(losses) # apply the calculations of the computational graph total_loss.forward() # calculate loss to backpropogate
e = dy.concatenate_cols( [e1, e2, ...] ) # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...]) e = dy.concatenate([e1, e2, ...]) # concatenate e = dy.affine_transform([e0, e1, e2, ...]) # e = e0 + ((e1*e2) + (e3*e4) ...) ## Loss functions e = dy.squared_distance(e1, e2) e = dy.l1_distance(e1, e2) e = dy.huber_distance(e1, e2, c=1.345) # e1 must be a scalar that is a value between 0 and 1 # e2 (ty) must be a scalar that is a value between 0 and 1 # e = ty * log(e1) + (1 - ty) * log(1 - e1) e = dy.binary_log_loss(e1, e2) # e1 is row vector or scalar # e2 is row vector or scalar # m is number # e = max(0, m - (e1 - e2)) e = dy.pairwise_rank_loss(e1, e2, m=1.0) # Convolutions # e1 \in R^{d x s} (input) # e2 \in R^{d x m} (filter) e = dy.conv1d_narrow(e1, e2) # e = e1 *conv e2 e = dy.conv1d_wide(e1, e2) # e = e1 *conv e2 e = dy.filter1d_narrow(e1, e2) # e = e1 *filter e2 e = dy.kmax_pooling(e1, k) # kmax-pooling operation (Kalchbrenner et al 2014)
def calculateLoss(self, output, y): #function to calculate the loss value loss = dy.binary_log_loss(output, y) return loss
if len(sys.argv) == 2: m.populate_from_textfile(sys.argv[1]) W = dy.parameter(pW) b = dy.parameter(pb) V = dy.parameter(pV) a = dy.parameter(pa) x = dy.vecInput(2, "GPU:0") y = dy.scalarInput(0, "CPU") h = dy.tanh((W * x) + b) h_cpu = dy.to_device(h, "CPU") if xsent: y_pred = dy.logistic((V * h_cpu) + a) loss = dy.binary_log_loss(y_pred, y) T = 1 F = 0 else: y_pred = (V * h_cpu) + a loss = dy.squared_distance(y_pred, y) T = 1 F = -1 for iter in range(ITERATIONS): mloss = 0.0 for mi in range(4): x1 = mi % 2 x2 = (mi // 2) % 2 x.set([T if x1 else F, T if x2 else F]) y.set(T if x1 != x2 else F)
dy.renew_cg( ) # new computation graph. not strictly needed here, but good practice. # associate the parameters with cg Expressions # creates a computation graph and adds the parameters to it, # transforming them into Expressions. # The need to distinguish model parameters from “expressions” will become clearer later. W = dy.parameter(pW) V = dy.parameter(pV) b = dy.parameter(pb) x = dy.vecInput(2) # an input vector of size 2. Also an expression. output = dy.logistic(V * (dy.tanh((W * x) + b))) y = dy.scalarInput(0) loss = dy.binary_log_loss(output, y) print(x.value()) print(b.value()) trainer = dy.SimpleSGDTrainer(m, learning_rate=.001) for i in range(1000): x.set([1, 1]) y.set(0) # loss.value() loss.backward() trainer.update() print(x.value()) print(b.value())
trainer = dy.SimpleSGDTrainer(m) W = m.add_parameters((HIDDEN_SIZE, 2)) b = m.add_parameters(HIDDEN_SIZE) V = m.add_parameters((1, HIDDEN_SIZE)) a = m.add_parameters(1) if len(sys.argv) == 2: m.populate_from_textfile(sys.argv[1]) x = dy.vecInput(2) y = dy.scalarInput(0) h = dy.tanh((W*x) + b) if xsent: y_pred = dy.logistic((V*h) + a) loss = dy.binary_log_loss(y_pred, y) T = 1 F = 0 else: y_pred = (V*h) + a loss = dy.squared_distance(y_pred, y) T = 1 F = -1 for iter in range(ITERATIONS): mloss = 0.0 for mi in range(4): x1 = mi % 2 x2 = (mi // 2) % 2 x.set([T if x1 else F, T if x2 else F])
def _compute_binary_divergence(self, pred, target): return dy.binary_log_loss(pred, target)
losses = [] batch_size = 1000 dynet.renew_cg() for j in tqdm(range(int(X.shape[0]/batch_size) + 1)): for k in range(batch_size): index = (j*batch_size) + k if index > X.shape[0]-1: break # prepare input little_x = X[index].reshape(1,-1) # must make it a vector with dimensions (1 x voc_size) # prepare output little_y = dynet.inputTensor(y[index]) # make a forward pass pred = m.one_pass(little_x) # calculate loss for each example loss = dynet.binary_log_loss(pred, little_y) if not numpy.isnan(loss.npvalue()): losses.append(loss) else: print(i,j,'loss was nan!') total_loss = dynet.esum(losses) # apply the calculations of the computational graph total_loss.forward() # calculate loss to backpropogate total_loss.backward() # update parameters with backpropogated error m.trainer.update() #### end of batch if last_loss: cur_loss = total_loss.npvalue()[0] print('cur loss:', cur_loss)