def evaluate_edit_distance(data_loader: AsyncDataLoader, model): ''' Measures the mean (over instances) of the characterwise edit distance (Levenshtein distance) between predicted and true names ''' logged_example = False with data_loader as data_loader: cum_edit_distance = 0 for split_batch, batch_length in tqdm(data_loader, total=data_loader.total_batches): batches_outputs = [(batch, model(batch.data)) for batch in split_batch] for batch, output in batches_outputs: predictions_labels = model.unbatchify(batch, output) for prediction, label in predictions_labels: if not logged_example: logger.info('Some example predictions:\n{}'.format( pprint.pformat(predictions_labels[:10]))) logged_example = True pred_name = ''.join(prediction) real_name = ''.join(label) cum_edit_distance += editdistance.eval( pred_name, real_name) return cum_edit_distance / len(data_loader) pred = [] true = [] for i in tqdm(range(0, math.ceil(len(dataset) / n_batch))): data = dataset[n_batch * i:n_batch * (i + 1)] graph, label = model.batchify(data, ctx) output = model(graph) predictions = nd.argmax(output, axis=2) # Masking output to max(length_of_output, length_of_label) output_preds = predictions.asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs))) else: output_lengths.append(model.max_name_length) output_lengths = nd.array(output_lengths, ctx=ctx) mask_lengths = nd.maximum(output_lengths, label.value_lengths) output = nd.SequenceMask(predictions, value=-1, use_sequence_length=True, sequence_length=mask_lengths, axis=1).asnumpy().astype('int32') labels = nd.SequenceMask(label.values, value=-1, use_sequence_length=True, sequence_length=mask_lengths.astype('int32'), axis=1).asnumpy() pred += [i for i in output.flatten().tolist() if i >= 0] true += [i for i in labels.flatten().tolist() if i >= 0] return metrics.f1_score(true, pred, average='weighted')
def RMSE_many_to_many(predictions, labels, data_lengths): loss = (labels.expand_dims(axis=2) - predictions).square() loss = nd.SequenceMask(loss, data_lengths, use_sequence_length=True, axis=1) weight = 1 / (labels + 1) loss_no_weight = loss.sum(axis=1).squeeze() / data_lengths.astype('float32') loss_weighted = ((loss.squeeze() * weight).sum(axis=1).squeeze() / data_lengths.astype('float32')) return loss_weighted, loss_no_weight
def forward(self, pred, label, valid_length): # pylint: disable=arguments-differ """ Parameters ---------- F pred : Symbol or NDArray Shape (batch_size, length, V) label : Symbol or NDArray Shape (batch_size, length) valid_length : Symbol or NDArray Shape (batch_size, ) Returns ------- loss : Symbol or NDArray Shape (batch_size,) """ if self._sparse_label: sample_weight = nd.cast(nd.expand_dims(nd.ones_like(label), axis=-1), dtype=np.float32) else: sample_weight = nd.ones_like(label) sample_weight = nd.SequenceMask(sample_weight, sequence_length=valid_length, use_sequence_length=True, axis=1) return super(SoftmaxCEMaskedLoss, self).forward( pred, label, sample_weight)
def hybrid_forward(self, F, output, *args, **kwargs): ''' Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1 ''' (label, _), data_encoder = args joint_label, label_lengths = label.values, label.value_lengths # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken loss = nd.pick(output, joint_label, axis=2) # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label) output_preds = nd.argmax(output, axis=2).asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs)) + 1) else: output_lengths.append(output.shape[1]) output_lengths = nd.array(output_lengths, ctx=output.context) mask_lengths = nd.maximum(output_lengths, label_lengths) loss = nd.SequenceMask(loss, value=1.0, use_sequence_length=True, sequence_length=mask_lengths, axis=1) return nd.mean(-nd.log(loss), axis=0, exclude=True)
def check_sequence_mask(): # Sequence Mask input [max_sequence_length, batch_size] # test with input batch_size = 2 a = nd.arange(0, LARGE_X * 2).reshape(LARGE_X, 2) # test as identity operator b = nd.SequenceMask(a) assert b[-1][0] == a[-1][0] assert b.shape == a.shape # test with default mask b = nd.SequenceMask(a, sequence_length=nd.array([1, 1]), use_sequence_length=True) assert b[0][1] == a[0][1] # first sequence of each batch kept assert b[-1][-1] != a[-1][-1] # rest sequences masked assert b[-1][-1] == 0 # test with mask value b = nd.SequenceMask(a, sequence_length=nd.array([1, 1]), use_sequence_length=True, value=-1) assert b[-1][-1] == -1
def masked_softmax(X, valid_length): if valid_length is None: return X.softmax() else: shape = X.shape if valid_length.ndim == 1: valid_length = valid_length.repeat(shape[1], axis=0) else: valid_length = valid_length.reshape((-1,)) X = nd.SequenceMask(X.reshape((-1, shape[-1])), valid_length, True, axis=1, value=-1e6) return X.softmax().reshape(shape)
def forward(self, x, length, hidden=None): #feed forward outputs = self.rnn(x) #outputs:[batch, seq_length, 2*num_hiddens] if hidden is not None: outputs, state = self.rnn(x, hidden) outputs = nd.transpose(outputs, (1, 0, 2)) outputs = nd.SequenceMask(outputs, sequence_length=length, use_sequence_length=True, value=0) outputs = nd.transpose(outputs, (1, 0, 2)) hidden = [output[i - 1] for (output, i) in zip(outputs, length)] hidden = nd.stack(*hidden).squeeze() return outputs, hidden
def forward(self, pred, label, valid_length): weights = nd.ones_like(label).expand_dims(axis=-1) weights = nd.SequenceMask(weights, valid_length, True, axis=1) return super(MaskedSoftmaxCELoss, self).forward(pred, label, weights)
def calulation(self, input_str, ko_dict, en_dict, en_rev_dict, ctx): """ inference 코드 """ #앞뒤에 START,END 코드 추가 input_str = [ [ 'START', ] + mecab.morphs(input_str.strip()) + [ 'END', ], ] X = encoding_and_padding(input_str, ko_dict, max_seq=self.max_seq_length) #string to embed inputs = F.array(X, ctx=ctx) inputs = F.cast(inputs, dtype='float32') in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx, F.ones_like(inputs), F.zeros_like(inputs)), axis=1) #encoder GRU embeddinged_in = F.cast(self.embedding(inputs), dtype='float32') next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx) for j in range(self.in_seq_len): p_outputs = F.slice_axis(embeddinged_in, axis=1, begin=j, end=j + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) enout, (next_h, ) = self.encoder(p_outputs, [ next_h, ]) if j == 0: enouts = enout next_hs = next_h else: enouts = F.concat(enouts, enout, dim=1) next_hs = F.concat(next_hs, next_h, dim=1) #masking with 0 using length enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden)) enouts = F.transpose(enouts, (1, 0, 2)) enouts = F.SequenceMask(enouts, sequence_length=in_sent_last_idx + 1, use_sequence_length=True) enouts = F.transpose(enouts, (1, 0, 2)) next_hs = F.reshape(next_hs, (-1, self.n_hidden)) #take가 0 dim만 지원하기 때문에.. # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...) next_hs = next_hs.take(in_sent_last_idx) #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다. Y_init = F.array([ [ en_dict['START'], ], ], ctx=ctx) Y_init = F.cast(self.embedding(Y_init), dtype='float32') deout = Y_init[:, 0, :] #출력 시퀀스 길이만큼 순회 for i in range(self.out_seq_len): if self.attention: #print(deout.shape) deout, att_weight = self.apply_attention( F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts) if i == 0: att_weights = att_weight else: att_weights = F.concat(att_weights, att_weight, dim=0) deout, (next_hs, ) = self.decoder(deout, [ next_hs, ]) #batchnorm을 적용하기 위해 차원 증가/원복 deout = F.expand_dims(deout, axis=1) deout = self.batchnorm(deout) #reduce dim deout = deout[:, 0, :] #'START'의 다음 시퀀스 출력값도출 deout_sm = self.dense(deout) #print(deout_sm.shape) deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1), depth=self.vocab_size) #print(deout.shape) #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤) deout = F.argmax(deout, axis=1) deout = F.expand_dims(deout, axis=0) deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32') gen_char = en_rev_dict[F.argmax(deout_sm, axis=1).asnumpy()[0].astype('int')] if gen_char == '__PAD__' or gen_char == 'END': break else: if i == 0: ret_seq = [ gen_char, ] else: ret_seq += [ gen_char, ] return (" ".join(ret_seq), att_weights)
def hybrid_forward(self, F, inputs, outputs, initial_hidden_state, batch_size_seq): #문장 길이 2 == END tag index inputs = F.cast(inputs, dtype='float32') in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx, F.ones_like(inputs), F.zeros_like(inputs)), axis=1) outputs = F.cast(outputs, dtype='float32') out_sent_last_idx = F.argmax(F.where(outputs == self.end_idx, F.ones_like(outputs), F.zeros_like(outputs)), axis=1) #encoder GRU embeddinged_in = F.cast(self.embedding(inputs), dtype='float32') next_h = initial_hidden_state for j in range(self.in_seq_len): p_outputs = F.slice_axis(embeddinged_in, axis=1, begin=j, end=j + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) enout, (next_h, ) = self.encoder(p_outputs, [ next_h, ]) if j == 0: enouts = enout next_hs = next_h else: enouts = F.concat(enouts, enout, dim=1) next_hs = F.concat(next_hs, next_h, dim=1) #masking with 0 using length enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden)) enouts = F.transpose(enouts, (1, 0, 2)) enouts = F.SequenceMask(enouts, sequence_length=in_sent_last_idx + 1, use_sequence_length=True) enouts = F.transpose(enouts, (1, 0, 2)) next_hs = F.reshape(next_hs, (-1, self.n_hidden)) #take가 0 dim만 지원하기 때문에.. # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...) next_hs = next_hs.take(in_sent_last_idx + (batch_size_seq * self.max_seq_length)) embeddinged_out = F.cast(self.embedding(outputs), dtype='float32') #decoder GRU with attention for i in range(self.out_seq_len): #out_seq_len 길이만큼 GRUCell을 unroll하면서 출력값을 적재한다. p_outputs = F.slice_axis(embeddinged_out, axis=1, begin=i, end=i + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) # p_outputs = outputs[:,i,:] # 위와 같이 진행한 이유는 hybridize를 위함 if self.attention: p_outputs, _ = self.apply_attention(F=F, inputs=p_outputs, hidden=next_hs, encoder_outputs=enouts) deout, (next_hs, ) = self.decoder(p_outputs, [ next_hs, ]) if i == 0: deouts = deout else: deouts = F.concat(deouts, deout, dim=1) #2dim -> 3dim 으로 reshape deouts = F.reshape(deouts, (-1, self.out_seq_len, self.n_hidden)) #0 padding deouts = F.transpose(deouts, (1, 0, 2)) deouts = F.SequenceMask(deouts, sequence_length=out_sent_last_idx + 1, use_sequence_length=True) deouts = F.transpose(deouts, (1, 0, 2)) deouts = self.batchnorm(deouts) deouts_fc = self.dense(deouts) return (deouts_fc)
def __init__(self, num_inputs, num_hiddens, batch_first, drop_prob): super(LSTM, self).__init__() self.drop_prob = drop_prob self.batch_first = batch_first if batch_first == True: self.layout = 'NTC' else: self.layout = 'TNC' self.rnn = rnn.LSTM(num_hiddens, layout=self.layout, dropout=drop_prob, bidirectional=True, input_size=num_inputs, i2h_weight_initializer='Orthogonal', h2h_weight_initializer='Orthogonal') def forward(self, x, length, (hidden, cell)=None): outputs = self.rnn(x) #outputs:[batch, seq_length, 2*num_hiddens] if (hidden, cell) is not None: outputs, state = self.rnn(x, (hidden, cell)) outputs = nd.transpose(outputs, (1, 0, 2)) outputs = nd.SequenceMask(outputs, sequence_length=length, use_sequence_length=True, value=0) outputs = nd.transpose(outputs, (1, 0, 2)) hidden = [output[i - 1] for (output, i) in zip(outputs, length)] hidden = nd.stack(*hidden).squeeze() return outputs, hidden