def my_loss(data, nc, ns, nq): data = data.astype('float64') cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1)) cls_center = nd.mean(cls_data, axis=1) + 1e-10 data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2)**2 weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64') for i in range(0, nc): weight[i * nq:i * nq + nq, i] = 1 weight2 = 1 - weight temp1 = nd.log_softmax(-data_center_dis, axis=1) temp2 = nd.sum(temp1, axis=1) temp3 = nd.sum(-temp2) label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label loss1 = nd.sum(data_center_dis * weight) temp = nd.sum(nd.exp(-data_center_dis), axis=1) loss2 = nd.sum(nd.log(temp)) if loss1 is np.nan or loss2 is np.nan: raise StopIteration return (loss1 + loss2) / (nc * nq), label
def proto_loss(embedding, nc, ns, nq): embedding = embedding.astype('float64'); cls_data = nd.reshape(embedding[0:nc*ns], (nc, ns, -1)); cls_data.attach_grad() cls_center = nd.mean(cls_data, axis=1); data_center_dis = nd.norm(embedding[nc*ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2) ** 2 # print(nd.max(data_center_dis).asscalar()) weight = nd.zeros((nc*nq, nc), ctx=embedding.context, dtype='float64') pick_vec = nd.zeros((nc*nq), ctx=embedding.context) for i in range(0, nc): weight[i*nq:i*nq+nq, i] = 1 pick_vec[i*nq:i*nq+nq] = i """ temp = nd.SoftmaxOutput(-data_center_dis, label) temp = nd.log(temp) * weight temp = nd.sum(-temp, axis=1) predict = nd.argmin(data_center_dis, axis=1) return -temp * nd.log(temp), predict """ temp1 = nd.log_softmax(-data_center_dis, axis=1); temp2 = nd.pick(temp1, index=pick_vec, axis=1); temp3 = nd.sum(-temp2); label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label
def hybrid_forward(self, F, x, y, ignore_label): output = F.log_softmax(x) label_matrix = mx.nd.zeros(output.shape, ctx=output.context) for i in xrange(label_matrix.shape[1]): label_matrix[:, i] = (y == i) ignore_unit = (y == ignore_label) loss = -F.sum(output * label_matrix, axis=1) return F.sum(loss) / (output.shape[0] - F.sum(ignore_unit))
def hybrid_forward(self, F, pred, label, valid_length): pred = pred[:, :-1, :] label = label[:, 1:] valid_length = valid_length - 1 if not self._from_logits: pred = F.log_softmax(pred, self._axis) loss = mx.nd.squeeze( -F.pick(pred, label, axis=self._axis, keepdims=True), axis=2) loss = F.SequenceMask(loss.swapaxes(0, 1), sequence_length=valid_length, use_sequence_length=True).swapaxes(0, 1) return F.mean(loss, axis=self._batch_axis, exclude=True)
def sample(self, batch_size=1, with_details=False, with_entropy=False): """ Returns ------- configs : list of dict list of configurations """ inputs = self.static_inputs[batch_size] hidden = self.static_init_hidden[batch_size] actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits, hidden = self.forward(inputs, hidden, idx, is_embed=(idx == 0)) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) inputs = action[:, 0] + sum(self.num_tokens[:idx]) inputs.detach() configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def sample(self, batch_size=1, with_details=False, with_entropy=False): # self-attention x = self.embedding(batch_size).reshape( -3, 0) # .squeeze() # b x action x h kshape = (batch_size, self.num_total_tokens, self.hidden_size) vshape = (batch_size, self.num_total_tokens, 1) querry = self.querry(x).reshape(*kshape) # b x actions x h key = self.key(x).reshape(*kshape) # b x actions x h value = self.value(x).reshape(*vshape) # b x actions x 1 atten = mx.nd.linalg_gemm2(querry, key, transpose_b=True).softmax(axis=1) alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1) actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): i0 = sum(self.num_tokens[:idx]) i1 = sum(self.num_tokens[:idx + 1]) logits = alphas[:, i0:i1] probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def evaluate(test_net, test_data, args): exe_num = len(test_net._context) curr_states = test_net.get_states(merge_multi_context=False) # Set the state to zero when a new epoch begins for state_id in range(len(curr_states)): for exe_id in range(exe_num): curr_states[state_id][exe_id][:] = 0 test_net.set_states(curr_states) total_nll = 0.0 for i, start in enumerate(range(0, test_data.shape[0] - 1, args.bptt)): start = i * args.bptt data_batch_npy = np.take(test_data, np.arange(start, start + args.bptt), axis=0, mode="clip") target_batch_npy = np.take(test_data, np.arange(start + 1, start + 1 + args.bptt), axis=0, mode="clip") if start + args.bptt > test_data.shape[0]: valid_seq_len = test_data.shape[0] - start else: valid_seq_len = args.bptt test_net.forward(data_batch=mx.io.DataBatch(data=[mx.nd.array(data_batch_npy)]), is_train=False) outputs = test_net.get_outputs(merge_multi_context=False) local_nll = 0.0 for exe_id in range(exe_num): logits = outputs[0][exe_id] nll = - nd.pick(nd.log_softmax(logits), nd.array(target_batch_npy, ctx=logits.context), axis=-1).asnumpy() local_nll += nll[:valid_seq_len, :].mean() * valid_seq_len total_nll += local_nll / exe_num for out_id in range(1, len(outputs)): for exe_id in range(exe_num): curr_states[out_id - 1][exe_id] = outputs[out_id][exe_id] test_net.set_states(states=curr_states) avg_nll = total_nll / test_data.shape[0] return avg_nll
def sample(self, batch_size=1, with_details=False, with_entropy=False): actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits = self.decoders[idx](batch_size) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, axis=self._axis) if self._sparse_label: if self._size_average: valid_label_map = (label != self._ignore_label).astype('float32') loss = -(F.pick(pred, label, axis=self._axis, keepdims=True) * valid_label_map) else: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) loss = F.where( label.expand_dims(axis=self._axis) == self._ignore_label, F.zeros_like(loss), loss) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_label_map.size / F.sum(valid_label_map) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def log_pdf(self, y): return nd.sum( nd.nansum(y * nd.log_softmax(self.unnormalized_mean), axis=0, exclude=True))
def softmax_cross_entropy(yhat_linear, y): return -nd.nansum( y * nd.log_softmax(yhat_linear), axis=0, exclude=True)