def viterbi_decode(self, state_feats): """ :param state_feats: shape=(batch_size, seq_len, tag_size) :return: path_score: shape=(batch_size,) best_path: shape=(batch_size, seq_len) """ backpointers = [] state_feats_tmp = state_feats.transpose((1, 0, 2)) max_score = state_feats_tmp[0] if state_feats_tmp.shape[0] > 1: for feat in state_feats_tmp[1:]: next_tag_score = max_score.expand_dims(1) + ( feat.expand_dims(1) + self._transitions.data()).transpose( (0, 2, 1)) backpointers.append(nd.argmax(next_tag_score, axis=-1)) max_score = nd.max(next_tag_score, axis=-1) best_tag = nd.argmax(max_score, axis=-1) path_score = nd.pick(max_score, best_tag) best_path = [best_tag] for bp in reversed(backpointers): best_path.append(nd.pick(bp, best_path[-1])) best_path.reverse() best_path = nd.concat(*map(lambda x: x.expand_dims(0), best_path), dim=0).transpose() return path_score, best_path
def replay(self): # experience replay if len(self.memory) < self.batch_size: return batch = random.sample(self.memory, self.batch_size) state_batch = nd.array([b[0] for b in batch]) action_batch = nd.array([b[1] for b in batch]) reward_batch = nd.array([b[2] for b in batch]) next_state_batch = nd.array([b[3] for b in batch]) # Double-DQN: # Calculate target value by choosing action with online network, # and getting value from target network target_action_batch = np.argmax(self.train_model(next_state_batch), 1) target_batch = reward_batch + self.gamma * \ nd.pick(self.target_model(next_state_batch), target_action_batch, 1) #np.max(self.target_model(next_state_batch),1) for i in range(self.batch_size): # s, a, r, _s, d if batch[i][4]: target_batch[i] = reward_batch[i] #target_batch[i] = target_batch[i] + self.gamma * \ # np.max(self.model(nd.reshape(next_state_batch[i],[1,4])),1) with autograd.record(): q_target_batch = self.train_model(state_batch) #print(q_target_batch.shape,"\n", target_batch.shape) output_batch = nd.pick(q_target_batch, action_batch, 1) loss = self.loss(output_batch,target_batch) loss.backward() self.train_loss += loss.mean().asscalar() self.trainer.step(self.batch_size) return
def _viterbi_decode(self, feats): ''' CRF 的预测算法,维特比算法,即根据特征找出最好的路径 feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size) ''' backpointers = [] batch_size = feats[0].shape[0] vvars = nd.full((1, self.tagset_size), -10000., ctx=self.ctx) vvars[0, self.tag2idx[START_TAG]] = 0 # vvars 形状:(batch_size, tagset_size) vvars = nd.broadcast_axis(vvars, axis=0, size=batch_size) for feat in feats: bptrs_t = [] viterbivars_t = [] for next_tag in range(self.tagset_size): next_tag_var = vvars + nd.broadcast_axis( self.transitions.data()[next_tag].reshape((1, -1)), axis=0, size=batch_size) # best_tag_id 形状(batch_size, 1) best_tag_id = nd.argmax(next_tag_var, axis=1, keepdims=True) bptrs_t.append(best_tag_id) # viterbivars_t 列表中每个元素的形状为 (batch_size, 1) viterbivars_t.append( nd.pick(next_tag_var, best_tag_id, axis=1, keepdims=True)) vvars = (nd.concat(*viterbivars_t, dim=1) + feat) # bptrs_t 形状 :(batch_size, tagset_size) bptrs_t = nd.concat(*bptrs_t, dim=1) backpointers.append(bptrs_t) # 转换到 STOP_TAG terminal_var = vvars + self.transitions.data()[self.tag2idx[START_TAG]] best_tag_id = nd.argmax(terminal_var, axis=1) # path_score 形状(batch_size, ) path_score = nd.pick(terminal_var, best_tag_id, axis=1) # 根据反向指针 backpointers 去解码最好的路径 best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = nd.pick(bptrs_t, best_tag_id, axis=1) best_path.append(best_tag_id) # 移除开始符号 # start 形状为 (batch_size, ) start = best_path.pop() # 检查start是否为开始符号 for i in range(batch_size): assert start[i].asscalar() == self.tag2idx[START_TAG] best_path.reverse() # 构建最佳路径的矩阵 new_best_path = [] for best_tag_id in best_path: best_tag_id = best_tag_id.reshape((-1, 1)) new_best_path.append(best_tag_id) best_path_matrix = nd.concat(*new_best_path, dim=1) return path_score, best_path_matrix
def forward(self, pred, target, mask): # truncate to the same size pred = pred.copy() bsz = pred.shape[0] target = target.copy() mask = mask.copy() #print('target.shape:',target.shape) #target.shape: (8, 30) target = target[:, :pred.shape[1]].reshape(-1, 1) #print('target.shape:',target.shape) #target.shape: (240, 1) mask = mask[:, :pred.shape[1]].reshape(-1, 1) #print('mask.shape:',mask.shape) #mask.shape: (240, 1) pred = pred.reshape(-1, pred.shape[2]) #print('pred.shape:',pred.shape) #pred.shape: (240, 22) # compute loss #target = target.expand_dims(axis=0).broadcast_to(shape=(2,target.shape[0],target.shape[1])) loss = -nd.pick(pred, target).expand_dims( axis=1 ) * mask #gather(pred,dim=1,index = target) * mask #gather_nd #print("loss.shape:",loss.shape,nd.pick(pred,target).shape) loss = nd.sum(loss) / nd.sum(mask) # compute accuracy idx = nd.argmax(pred, axis=1).astype('int64') #print( idx.dtype,target.dtype) correct = (idx == nd.squeeze(target)) correct = correct.astype('float32') * nd.squeeze(mask) accuracy = nd.sum(correct) / nd.sum(mask) return loss, accuracy
def forward(self, output, label): output = nd.softmax(output) pt = nd.pick(output, label, axis=self._axis, keepdims=True) # print output.asnumpy()[np.where(label.asnumpy() > 0)] loss = -self._alpha * ((1 - pt)**self._gamma) * nd.log(pt) # loss = - nd.log(pt) return nd.mean(loss, axis=self._batch_axis, exclude=True)
def train_policy_net(self, imgs, actions, rs, terminals): """ Train one batch. Arguments: imgs - b x (f + 1) x C x H x W numpy array, where b is batch size, f is num frames, h is height and w is width. actions - b x 1 numpy array of integers rewards - b x 1 numpy array terminals - b x 1 numpy boolean array (currently ignored) Returns: average loss """ batch_size = actions.shape[0] states = imgs[:, :-1, :, :, :] next_states = imgs[:, 1:, :, :, :] s = states.shape states = states.reshape( (s[0], -1, s[-2], s[-1])) # batch x (f x C) x H x W next_states = next_states.reshape( (s[0], -1, s[-2], s[-1])) # batch x (f x C) x H x W st = nd.array(states, ctx=self.ctx, dtype=np.float32) / 255.0 at = nd.array(actions[:, 0], ctx=self.ctx) rt = nd.array(rs[:, 0], ctx=self.ctx) tt = nd.array(terminals[:, 0], ctx=self.ctx) st1 = nd.array(next_states, ctx=self.ctx, dtype=np.float32) / 255.0 next_qs = self.target_net(st1) next_q_out = nd.max(next_qs, axis=1) target = rt + next_q_out * (1.0 - tt) * DISCOUNT with autograd.record(): current_qs = self.policy_net(st) current_q = nd.pick(current_qs, at, 1) loss = self.loss_func(target, current_q) # diff = nd.abs(current_q - target) # quadratic_part = nd.clip(diff, -1, 1) # loss = 0.5 * nd.sum(nd.square(quadratic_part)) + nd.sum(diff - quadratic_part) # print('current_qs', current_qs) # print('current_q', current_q) # print('diff', diff) # print('quadratic_part', quadratic_part) # print('loss', loss) loss.backward() # 梯度裁剪 if GRAD_CLIPPING_THETA is not None: params = [ p.data() for p in self.policy_net.collect_params().values() ] g_utils.grad_clipping(params, GRAD_CLIPPING_THETA, self.ctx) self.trainer.step(batch_size) total_loss = loss.mean().asscalar() return total_loss
def update(self): states = nd.array(self.states, ctx=self.ctx) actions = nd.array(self.actions, ctx=self.ctx) total_reward = nd.array(self.total_reward, ctx=self.ctx) # ------------optimize actor----------- with autograd.record(): values = self.critic_network(states) probs = self.actor_network(states) advantages = (total_reward - values).detach() loss = -nd.pick(probs, actions).log() * advantages self.actor_network.collect_params().zero_grad() loss.backward() self.actor_optimizer.step(batch_size=len(states)) # -----------optimize critic------------ with autograd.record(): values = self.critic_network(states) l2_loss = gloss.L2Loss() loss = l2_loss(values, total_reward) self.critic_network.collect_params().zero_grad() loss.backward() self.critic_optimizer.step(batch_size=len(states)) self.states = [] self.actions = [] self.rewards = [] self.dones = [] self.next_states = [] self.total_reward = []
def hybrid_forward(self, F, output, *args, **kwargs): ''' Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1 ''' (label, _), data_encoder = args joint_label, label_lengths = label.values, label.value_lengths # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken loss = nd.pick(output, joint_label, axis=2) # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label) output_preds = nd.argmax(output, axis=2).asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs)) + 1) else: output_lengths.append(output.shape[1]) output_lengths = nd.array(output_lengths, ctx=output.context) mask_lengths = nd.maximum(output_lengths, label_lengths) loss = nd.SequenceMask(loss, value=1.0, use_sequence_length=True, sequence_length=mask_lengths, axis=1) return nd.mean(-nd.log(loss), axis=0, exclude=True)
def choose_action(self, state): state = nd.array([state], ctx=self.ctx) all_action_prob = self.actor_network(state) action = nd.sample_multinomial(all_action_prob) action_prob = nd.pick(all_action_prob, action, axis=1).asnumpy() action = int(action.asnumpy()) return action, action_prob
def step(self, s, num_steps_so_far): # s is channel-last, NHWC s = np.transpose(s, (0, 3, 1, 2)) s = nd.array(s, ctx=self.args.ctx) value, logits = self.net(s) # action = nd.argmax(logits, axis=1) # action = self.net.sample(logits) # logpac = self.net.log_prob(logits, action) # Epsilon greedy exploration eps = np.maximum(1. - num_steps_so_far / self.args.annealing_end, self.args.epsilon_min) action = nd.empty(shape=s.shape[0], ctx=self.args.ctx) logits_np = logits.asnumpy() for i in range(s.shape[0]): sample = np.random.random() if sample < eps: ac = random.randint(0, self.action_dim - 1) else: ac = int(np.argmax(logits[i])) action[i] = ac # Pick the probability of the chosen action logpac = nd.pick(logits, action, 1) # Reshaping the output logpac = logpac.asnumpy().reshape((-1)) action = action.asnumpy().astype(np.int32).reshape((-1)) value = value.asnumpy().reshape((-1)) return value, action, logpac
def forward(self, x): x = nd.pick(x, nd.broadcast_to(self._dim.data(), x.shape[0]), keepdims=True) x -= self._split.data() x *= nd.relu(self._sharpness.data()) return nd.tanh(x)
def label_offset(anchors, bbox, match, sample, means=(0,0,0,0), stds=(0.1,0.1,0.2,0.2), flatten=True): anchors = anchors.reshape((-1,4)) N, _ = anchors.shape B, M, _ = bbox.shape anchor_x, anchor_y, anchor_w, anchor_h = corner_to_center(anchors, split=True) bbox = bbox.reshape((B,1,M,4)) bbox = nd.broadcast_to(bbox, (B,N,M,4)) bbox = nd.stack(*[nd.pick(bbox[:,:,:,p], match) for p in range(4)], axis=-1) bbox_x, bbox_y, bbox_w, bbox_h = corner_to_center(bbox, split=True) offset_x = ((bbox_x - anchor_x) / anchor_w - means[0]) / stds[0] offset_y = ((bbox_y - anchor_y) / anchor_h - means[1]) / stds[1] offset_w = (nd.log(bbox_w/anchor_w) - means[2]) / stds[2] offset_h = (nd.log(bbox_h/anchor_h) - means[3]) / stds[3] offset = nd.concat(*(offset_x, offset_y, offset_w, offset_h), dim=-1) sample = sample.reshape((B,N,1)) sample = nd.broadcast_to(sample, (B,N,4)) > 0.5 anchor_offset = nd.where(sample, offset, nd.zeros_like(offset)) anchor_mask = nd.where(sample, nd.ones_like(offset), nd.zeros_like(offset)) if flatten: anchor_offset = anchor_offset.reshape((B,-1)) anchor_mask = anchor_mask.reshape((B,-1)) return anchor_mask, anchor_offset
def cross_entropy(y_hat, y): """ 交叉熵损失函数 :param y_hat: :param y: :return: """ return -nd.pick(y_hat, y).log()
def forward(self, x, crisp=False): pick_index = nd.broadcast_to(self._dim.data(), x.shape[0]) x = nd.pick(x, pick_index, keepdims=True) x = x - self._split.data() if (crisp == False): x = x * nd.relu(self._sharpness.data()) return nd.sigmoid(x)
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [ _as_list(x) for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,)) # compute element-wise cross entropy loss and sort, then perform # negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < ( pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where( (pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum( cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where( box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum( box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def _seq_score(self, state_feats, tag_seq): """ :param state_feats: shape=(batch_size, seq_len, tag_size) :param tag_seq: shape=(batch_size, seq_len) :return: shape=(batch_size,) """ # state_feats_tmp: shape=(seq_len, batch_size, tag_size) state_feats_tmp = state_feats.transpose((1, 0, 2)) # tag_seq_tmp: shape=(seq_len, batch_size) tag_seq_tmp = tag_seq.transpose() score = nd.pick(state_feats_tmp[0], tag_seq_tmp[0], axis=1) if state_feats_tmp.shape[0] > 1: for idx, feat in enumerate(state_feats_tmp[1:]): score = score + nd.pick(feat, tag_seq_tmp[idx + 1], axis=1) + \ nd.pick(self._transitions.data()[tag_seq_tmp[idx]], tag_seq_tmp[idx + 1], axis=1) return score
def hybrid_forward(self,F,pred,label,alpha=1.0,gamma=0,weight=1.0): #调用真正的SoftMaxCrossEntropyLoss函数 #首先对最后一维进行softmax运算 pred_prob = F.softmax(pred) #选出相应的类别 output = nd.pick(pred_prob,label,axis=self.axis) #print(output.shape) #计算损失函数 loss = F.mean((-alpha*((1-output)**gamma)*output.log()),axis=self.axis) return loss
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch pos_ct = [ct > 0 for ct in cls_target] num_pos = [ct.sum() for ct in pos_ct] num_pos_all = sum([p.asscalar() for p in num_pos]) # print ('num_pos_all: {}'.format(num_pos_all)) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [ nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred) ] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): # print ('cp shape: {}'.format(cp.shape)) # print ('bp shape: {}'.format(bp.shape)) # print ('ct shape: {}'.format(ct.shape)) # print ('bt shape: {}'.format(bt.shape)) pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum( self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def cross_entropy(y_hat, y): # 取出选中正确衣服的预测概率 tmp = nd.pick(y_hat, y) # 此处把真实数据对应的概率选中,然后求ln # 如果都选对了,ln1 = 0 # ln1/p = -lnp # (log就是求ln) return -tmp.log()
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) # synchronize across different machines # print('before sync:', num_pos_all) if self._distributed: num_pos_out = nd.zeros(1, mx.cpu()) num_pos_in = nd.zeros(1, mx.cpu()) + num_pos_all # allreduce only supports pushpull if 'allreduce' in self._kv_store_type: self._kv_store.pushpull(self._num_pos_key, num_pos_in, num_pos_out) else: self._kv_store.push(self._num_pos_key, num_pos_in) # self._kv_store._barrier() self._kv_store.pull(self._num_pos_key, out=num_pos_out) num_pos_all = num_pos_out.asscalar() # print('after sync:', num_pos_all) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,)) # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def _score_sentence(self, feats, tags): ''' 计算标注序列的评分 feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size) tags: 长度为句子长度的列表, 列表中每个元素为一个 nd.array, 代表一批中每个词的标注的索引, 形状为:(batch_size, 1) ''' batch_size = feats[0].shape[0] score = nd.ones((batch_size, ), ctx=self.ctx) # 检索一批句子符号序列的开始标签的矩阵, 形状为:(batch_size, 1) temp = nd.array([self.tag2idx[START_TAG]] * batch_size, ctx=self.ctx).reshape((batch_size, 1)) # 拼接, 结果形状为: (batch_size, max_seq_len + 1) tags = nd.concat(temp, *tags, dim=1) for i, feat in enumerate(feats): score = score + nd.pick(self.transitions.data()[tags[:, i + 1]], tags[:, i], axis=1) + \ nd.pick(feat, tags[:, i + 1], axis=1) score = score + self.transitions.data()[self.tag2idx[STOP_TAG], tags[:, tags.shape[1] - 1]] return score
def update_score(data, states): """Update scores Args: data (NDArray): NDArray shape:(seq_len, batch_size, self.tagset_size) states (list of NDArray): [idx, tags, score] Returns: score (NDArray): NDarray shape: (batch_size,) """ # feat shape: (batch_size, self.tagset_size) feat = data # tag shape:(batch_size, 1) idx, tags_iner, score = states i = int(idx.asscalar()) score = score + nd.pick(self.transitions.data()[tags_iner[:, i + 1]], tags_iner[:, i], axis=1) + nd.pick(feat, tags_iner[:, i + 1], axis=1) idx += 1 return feat, [idx, tags, score]
def update(self): state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.replay_buffer.sample(self.batch_size) with autograd.record(): # get the Q(s,a) all_current_q_value = self.main_network(state_batch) main_q_value = nd.pick(all_current_q_value, action_batch) # different from DQN # get next action from main network, then get its Q value from target network all_next_q_value = self.target_network(next_state_batch).detach() # only get gradient of main network max_action = nd.argmax(all_current_q_value, axis=1) target_q_value = nd.pick(all_next_q_value, max_action).detach() target_q_value = reward_batch + (1 - done_batch) * self.gamma * target_q_value # record loss loss = gloss.L2Loss() value_loss = loss(target_q_value, main_q_value) self.main_network.collect_params().zero_grad() value_loss.backward() self.optimizer.step(batch_size=self.batch_size)
def label_box_cls(match, sample, gt_cls, ignore_label=-1): B, N = match.shape B, M = gt_cls.shape # (B,N,M) gt_cls = gt_cls.reshape((B,1,M)) gt_cls = nd.broadcast_to(gt_cls, (B,N,M)) # (B,N) label_cls = nd.pick(gt_cls, match, axis=-1) + 1 label_cls = nd.where(sample > 0.5, label_cls, nd.ones_like(label_cls)*ignore_label) label_cls = nd.where(sample < -0.5, nd.zeros_like(label_cls), label_cls) # (B,N) label_mask = label_cls > -0.5 return label_cls, label_mask
def update(self, obs, returns, masks, actions, values, logpacs, lrnow, cliprange_now): advantages = returns - values advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) advantages = nd.array(advantages, ctx=self.args.ctx) # .reshape((-1, 1)) obs = np.transpose(obs, (0, 3, 1, 2)) obs = nd.array(obs, ctx=self.args.ctx) actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1)) values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1)) returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1)) oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1)) # self.trainer.set_learning_rate(lrnow) # Auto grad with autograd.record(): # Value loss vpred, logits = self.net(obs) vpred_clipped = values + nd.clip(vpred - values, -cliprange_now, cliprange_now) vf_loss1 = nd.square(vpred - returns) vf_loss2 = nd.square(vpred_clipped - returns) vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2)) # Action loss # pi_log_prob = self.net.log_prob(logits, actions) pi_log_prob = nd.pick(logits, actions, 1) ratio = nd.exp(pi_log_prob - oldpi_log_prob) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - cliprange_now, 1.0 + cliprange_now) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Entropy term # entropy = self.net.entropy(logits) # Total loss # loss = vf_loss * self.args.value_coefficient + actor_loss # - entropy * self.args.entropy_coefficient loss = vf_loss + actor_loss # Compute gradients and updates loss.backward() self.trainer.step(obs.shape[0]) return actor_loss.asscalar(), vf_loss.asscalar() #, entropy.asscalar()
def learn(self): rewards = nd.array(self.discount_and_normalized_rewards(), ctx=self.ctx) states = nd.array(self.states, ctx=self.ctx) with autograd.record(): probs = self.network(states) actions = nd.array(self.actions, ctx=self.ctx) loss = -nd.pick(probs, actions).log() * rewards loss.backward() self.optimizer.step(batch_size=len(self.states)) # reset self.states = [] self.actions = [] self.rewards = []
def choose_batch_action(self, phi_list): batch_input = nd.array(phi_list, ctx=self.ctx) # print('choose_batch_action batch_input.shape', batch_input.shape) shape0 = batch_input.shape state = nd.array(batch_input, ctx=self.ctx).reshape((shape0[0], -1, shape0[-2], shape0[-1])) # print('choose_batch_action state.shape', state.shape) out = self.play_net(state) # print('choose_batch_action out.shape', out.shape) max_index = nd.argmax(out, axis=1) # print('choose_batch_action max_index.shape', max_index.shape) actions = max_index.astype('int') # print('choose_batch_action actions.shape', actions.shape) max_q_list = nd.pick(out, actions, 1).asnumpy().tolist() return actions.asnumpy().tolist(), max_q_list
def forward(self, program, parameters, index): program = program.transpose((1, 0, 2)) parameters = parameters.transpose((1, 0, 2)) bsz = program.shape[1] state = self.init_hidden(bsz, self.ctx) #print("program.shape:",program.shape) (3, bsz, 22) #print("param.shape:",parameters.shape) (3, bsz, 7) # program linear transform dim1 = program.shape program = program.reshape(-1, self.vocab_size + 1) x1 = nd.relu(self.pgm_embed(program)) x1 = x1.reshape(dim1[0], dim1[1], -1) #print("program.shape after embeding:",x1.shape) (3, bsz, 64) # parameter linear transform dim2 = parameters.shape parameters = parameters.reshape(-1, self.max_param) x2 = nd.relu(self.param_embed(parameters)) x2 = x2.reshape(dim2[0], dim2[1], -1) #print("param.shape after embeding:",x2.shape) (3, bsz, 64) # LSTM to aggregate programs and parameters x = nd.concat(x1, x2, dim=2) out, hidden = self.lstm(x, state) #print("lstm_out.shape:",out.shape) (3, bsz, 128) #print("index.shape:",index.shape) (bsz,) # select desired step aggregated features #print("index.shape:",index.shape,"out.shape:",out.shape) index = index.expand_dims(axis=1).broadcast_to( (bsz, out.shape[2])).expand_dims(axis=0) #index = index.expand_dims(axis = 2).transpose((1,0,2)).broadcast_to((out.shape[0],bsz, out.shape[2])) #print("index.shape:",index,"\nout:",out.shape) #(1,bsz,128) pgm_param_feat = nd.pick(out, index, 0).squeeze() #print("pgm_param_feat.shape:",pgm_param_feat.shape) #(bsz,128) pgm_param_feat = nd.relu(self.pgm_param_feat(pgm_param_feat)) #print("pgm_param_feat.shape:",pgm_param_feat.shape) (bsz,128) pgm_param_feat = pgm_param_feat.reshape(bsz, self.program_vector_size, 1, 1, 1) shape = self.decoder(pgm_param_feat) return shape
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def decode(self, x): batch_size = x.shape[0] state = self.init_hidden(batch_size, self.ctx) outputs_pgm = [] outputs_param = [] for i in range(self.seq_length): if i == 0: xt = x else: prob_pre = nd.exp(outputs_pgm[-1]) it1 = nd.argmax(prob_pre, axis=1) #print("it1 decode:",it1) xt = self.pgm_embed(it1) #print("xt decode:",xt) output, state = self.core(xt.expand_dims(axis=0), state) pgm_feat1 = nd.relu(self.logit1(output.squeeze(0))) pgm_feat2 = self.logit2(pgm_feat1) pgm_score = nd.log_softmax(pgm_feat2, axis=1) trans_prob = nd.softmax(pgm_feat2, axis=1).detach() param_feat1 = nd.relu(self.regress1(output.squeeze(0))) param_feat2 = nd.concat(trans_prob, param_feat1, dim=1) param_score = self.regress2(param_feat2) param_score = param_score.reshape(batch_size, self.vocab_size + 1, self.max_param) index = nd.argmax(trans_prob, axis=1) index = index.expand_dims(axis=1).expand_dims(axis=2).broadcast_to( shape=(batch_size, 1, self.max_param)).detach() ## param_score = nd.pick(param_score, index, 1) outputs_pgm.append(pgm_score) outputs_param.append(param_score) outputs_pgm = [_.expand_dims(axis=1) for _ in outputs_pgm] outputs_param = [_.expand_dims(axis=1) for _ in outputs_param] pgms = outputs_pgm[0] params = outputs_param[0] for i in range(1, len(outputs_pgm)): pgms = nd.concat(pgms, outputs_pgm[i], dim=1) params = nd.concat(params, outputs_param[i], dim=1) return [pgms, params]
def get_smoothed_loss(pred, label, num_classes, trg_pad, smooth_alpha=0.1): pred = nd.maximum(pred, 1e-10) logprob = nd.log_softmax(pred) # cross entropy ce = -nd.pick(logprob, label) pre_class_gain = smooth_alpha / (num_classes - 1) # loss = (1 - smooth_alpha - pre_class_gain) * ce - pre_class_gain * sum(logprob) loss = (1 - smooth_alpha - pre_class_gain) * ce - nd.sum( pre_class_gain * logprob, axis=-1, keepdims=False) mask = label != trg_pad loss = loss * mask loss = nd.sum(loss) / mask.sum() return loss
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ is_train = autograd.is_training() def flatten_numpy(ndarray): """Flatten nd-array to 1-d column vector Parameters ---------- ndarray : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(ndarray, (-1,), 'F') batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head) dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape( seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: loss = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: return arc_accuracy, rel_accuracy, overall_accuracy, loss outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs