def dev(ch_bert, model, ch_vocab, dev_dataiter, logger, ctx): TP_s = 0 FP_s = 0 FN_s = 0 example_ids = [] for content, token_types, valid_len, label, example_id in tqdm( dev_dataiter): example_ids.extend(example_id) content = content.as_in_context(ctx) token_types = token_types.as_in_context(ctx) valid_len = valid_len.as_in_context(ctx) label = label.as_in_context(ctx) output = model(content, token_types, valid_len) predict = nd.argmax(nd.softmax(output, axis=-1), axis=-1) label = label.as_in_context(ctx) tp_s = int(nd.sum(nd.equal(predict, label)).asscalar()) fp_s = int( nd.sum(nd.not_equal(predict, label) * nd.equal(label, 0)).asscalar()) fn_s = int( nd.sum(nd.not_equal(predict, label) * nd.equal(label, 1)).asscalar()) TP_s += tp_s FP_s += fp_s FN_s += fn_s P_s = TP_s / (TP_s + FP_s) R_s = TP_s / (TP_s + FN_s) F = (2 * P_s * R_s) / (P_s + R_s) logger.info("F:{}".format(F)) return F
def distanceAA2(regions,i,binnum,dibins,dibins4): #Initiate empty array for storing histogram for directions, distances, and number of counted pairs in each distance range bin co0=nd.zeros(binnum-1,gpu(0),dtype="float32") codi0=nd.zeros((5,binnum-1),gpu(0),dtype="float32") count0=nd.zeros(binnum-1,gpu(0),dtype="float32") count4=nd.zeros((5,binnum-1),gpu(0),dtype="float32") co4=nd.zeros((5,binnum-1),gpu(0),dtype="float32") seed=nd.zeros((1,2),gpu(0)) #Calculate index coordinates and directions by chuncks a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:] b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:] a1=nd.array(a,gpu(0)) b1=nd.array(b,gpu(0)) # print ("a1",a1,"b1",b1) for ii in range (a1.shape[0]-1): a1_b1=(nd.expand_dims(a1[ii].reshape((1,2)),axis=1)-b1[ii+1:,:]).reshape((a1[ii+1:,:].shape[0],2)) seed=nd.concat(seed,a1_b1,dim=0) if seed.shape[0]>1: x1_x2=seed[1:,0] y1_y2=seed[1:,1] labels=nd.zeros(x1_x2.shape[0],gpu(0),dtype="float32") sdi0=(nd.degrees(nd.arctan((y1_y2)/(x1_x2)))+90).reshape((-1,)) ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,)) #Change 0 to 180 so it can apply sum of boolean mask without losing values sdi0=nd.where(condition=(sdi0==0),x=labels+180,y=sdi0) #Store sum of distances co0 and histogram of directions in each range bin for p in range (0,binnum-1): booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1])) count0[p]+=nd.nansum(booleanmask) co0[p]+=nd.nansum(ldis*booleanmask) #Exclue values not in distance range bin sdi1=nd.where(condition=(booleanmask==0),x=labels-1,y=sdi0) for q in range (0,5): booleanmaskdi=nd.equal((sdi1>=dibins[q]),(sdi1<dibins[q+1])) codi0[q,p]+=nd.nansum(booleanmaskdi) for k in range (0,5): booleanmaskdi=nd.equal((sdi0>=dibins4[k]),(sdi0<dibins4[k+1])) ldis0=ldis*booleanmaskdi for l in range (0,binnum-1): booleanmask=nd.equal((ldis0>=bins[l]),(ldis0<bins[l+1])) count4[k,l]+=nd.nansum(booleanmask) co4[k,l]+=nd.nansum(ldis0*booleanmask) codi0[0,:]+=codi0[4,:] codi0=codi0[0:4,:] count4[0,:]+=count4[4,:] count4=count4[0:4,:] co4[0,:]+=co4[4,:] co4=co4[0:4,:] return(co0,codi0,count0,co4,count4)
def distanceAATOPO(regions,i,binnum,dibins,dibins4,x,y,ctx): #Initiate empty array for storing histogram for directions, distances, and number of counted pairs in each distance range bin co0=nd.zeros(binnum-1,ctx[0],dtype="float32") codi0=nd.zeros((5,binnum-1),ctx[0],dtype="float32") count0=nd.zeros(binnum-1,ctx[0],dtype="float32") count4=nd.zeros((5,binnum-1),ctx[0],dtype="float32") co4=nd.zeros((5,binnum-1),ctx[0],dtype="float32") #Calculate index coordinates and directions by chuncks a=regions[i*broadcdp:min((i+1)*broadcdp,regions.shape[0]),:] a1=nd.array(a,ctx[0]) b1=nd.array([x,y],ctx[0]) a1_b1=(nd.expand_dims(a1,axis=1)-b1).reshape((-1,2)) x1_x2=a1_b1[:,0] y1_y2=a1_b1[:,1] #Find the rows where all equal zeros boolmask=(x1_x2==0)*(y1_y2==0) labels=nd.zeros(boolmask.shape[0],ctx[0],dtype="float32") sdi0=(nd.degrees(nd.arctan((y1_y2)/(x1_x2)))+90).reshape((-1,)) ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,)) #Change the zeros into -1 sdi0=nd.where(condition=boolmask,x=labels-1,y=sdi0) ldis=nd.where(condition=boolmask,x=labels-1,y=ldis) #Change 0 to 180 so it can apply sum of boolean mask without losing values sdi0=nd.where(condition=(sdi0==0),x=labels+180,y=sdi0) #Store sum of distances co0 and histogram of directions in each range bin for p in range (0,binnum-1): booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1])) count0[p]+=nd.sum(booleanmask) co0[p]+=nd.sum(ldis*booleanmask) #Exclue values not in distance range bin sdi1=nd.where(condition=(booleanmask==0),x=labels-1,y=sdi0) for q in range (0,5): booleanmaskdi=nd.equal((sdi1>=dibins[q]),(sdi1<dibins[q+1])) codi0[q,p]+=nd.nansum(booleanmaskdi) for k in range (0,5): booleanmaskdi=nd.equal((sdi0>=dibins4[k]),(sdi0<dibins4[k+1])) ldis0=ldis*booleanmaskdi for l in range (0,binnum-1): booleanmask=nd.equal((ldis0>=bins[l]),(ldis0<bins[l+1])) count4[k,l]+=nd.sum(booleanmask) co4[k,l]+=nd.sum(ldis0*booleanmask) codi0[0,:]+=codi0[4,:] codi0=codi0[0:4,:] count4[0,:]+=count4[4,:] count4=count4[0:4,:] co4[0,:]+=co4[4,:] co4=co4[0:4,:] return(co0.asnumpy(),codi0.asnumpy(),count0.asnumpy(),co4.asnumpy(),count4.asnumpy())
def _get_new_alive_state(self, new_seq, new_log_probs, new_cache): """Gather the top k sequences that are still alive. Args: new_seq: New sequences generated by growing the current alive sequences int32 tensor with shape [batch_size, 2 * beam_size, cur_index + 1] new_log_probs: Log probabilities of new sequences float32 tensor with shape [batch_size, beam_size] new_cache: Dict of cached values for each sequence. Returns: Dictionary with alive keys from _StateKeys: {Top beam_size sequences that are still alive (don't end with eos_id) Log probabilities of top alive sequences Dict cache storing decoder states for top alive sequences} """ new_finished_flags = nd.equal(new_seq[:, :, -1], self.eos_id) new_log_probs = new_log_probs + new_finished_flags * -INF top_alive_seq, top_alive_log_probs = _gather_topk_beams( [new_seq, new_log_probs], new_log_probs, self.batch_size, self.beam_size) top_alive_cache = _gather_topk_beams([], new_log_probs, self.batch_size, self.beam_size, cache=new_cache) return { _StateKeys.ALIVE_SEQ: top_alive_seq, _StateKeys.ALIVE_LOG_PROBS: top_alive_log_probs, _StateKeys.ALIVE_CACHE: top_alive_cache }
def train(self,epochs): for i in range(epochs): efficiency = 0 cumuLoss = 0 for j in range(self.nbIter): z = nd.round(nd.random.uniform(0,1,(self.batchSize,self.code.k),ctx=self.ctx)) x = nd.dot(z,self.code.G)%2 noiseBSC = nd.random.uniform(0.01,0.99,(self.batchSize,self.code.n),ctx=self.ctx) noiseBSC = nd.floor(noiseBSC/nd.max(noiseBSC,axis=(1,)).reshape((self.batchSize,1))) y = (x + noiseBSC)%2 with autograd.record(): zHat = self.net(y) loss = self.SE(zHat,z) loss.backward() self.adam(self.params,self.vs,self.sqrs, self.lr, self.batchSize, self.t) self.t+=1 cumuLoss += loss.asscalar() zHat = nd.round(zHat) efficiency += nd.sum(nd.equal(zHat,z)).asscalar() Pc = efficiency/(self.batchSize*self.nbIter*self.code.k) Pe = 1 - Pc normCumuLoss = cumuLoss/(self.batchSize*self.nbIter*self.code.k) print("Epochs %d: Pe = %lf , loss = %lf" % (i,Pe,normCumuLoss))
def distance2(regions,i,binnum,bins,ctx): #Initiate empty array for storing the number of counted pairs in each distance range bin count0=nd.zeros(binnum-1,ctx[0],dtype="float32") seed=nd.zeros((1,2),ctx[0]) #Calculate index coordinates and directions by chuncks a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:] b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:] a1=nd.array(a,ctx[0]) b1=nd.array(b,ctx[0]) for i in range (a1.shape[0]): if i<a1.shape[0]-1: a1_b1=(nd.expand_dims(a1[i].reshape((1,2)),axis=1)-b1[i+1:,:]).reshape((a1[i+1:,:].shape[0],2)) seed=nd.concat(seed,a1_b1,dim=0) if seed.shape[0]>1: x1_x2=seed[:,0] y1_y2=seed[:,1] #Find the rows where all equal zeros and assign label -1 boolmask=(x1_x2==0)*(y1_y2==0) labels=nd.zeros(boolmask.shape[0],ctx[0],dtype="float32")-1 ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,)) #Change the zeros into -1 ldis=nd.where(condition=boolmask,x=labels,y=ldis) for p in range (0,binnum-1): booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1])) count0[p]+=nd.sum(booleanmask) return(count0.asnumpy())
def get_accuracy(pre_l, true_l): one_zero_pre = nd.where(pre_l > 0.5, nd.ones_like(pre_l), nd.zeros_like(pre_l)) compare = nd.equal(one_zero_pre, true_l).sum(axis=1) samples_right = nd.where(compare == 3, nd.ones_like(compare), nd.zeros_like(compare)).sum() all_num = pre_l.shape[0] return samples_right / all_num
def biaffine(self, dep_arc, dep_rel, head_arc, head_rel, mask, arc_targets, rel_targets, blend): is_train = autograd.is_training() batch_size = mask.shape[1] seq_len = mask.shape[0] W_arc = self.arc_W.data() arc_logits: nd.NDArray = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) if blend is not None: arc_logits = arc_logits + blend # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = nd.greater(arc_logits, 0) # sigmoid y > 0.5 when x > 0 if is_train or arc_targets is not None: arc_correct = arc_preds.asnumpy() * arc_targets arc_accuracy = np.sum(arc_correct) / np.sum(arc_targets * mask) # targets_1D = flatten_numpy(arc_targets) # losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) flat_arc_targets = reshape_fortran(arc_targets, (seq_len, seq_len * batch_size)) losses = self.binary_ce_loss(flat_arc_logits, nd.array(flat_arc_targets)) if is_train or arc_targets is not None: mask_1D_tensor = nd.array(flatten_numpy(mask)) arc_loss = nd.sum(losses * mask_1D_tensor) / mask_1D_tensor.sum() # return arc_accuracy, 0, 0, arc_loss W_rel = self.rel_W.data() rel_logits: nd.NDArray = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # #head x rel_size x #dep x batch_size flat_rel_logits = reshape_fortran(rel_logits.transpose([1, 0, 2, 3]), (self._vocab.rel_size, seq_len * seq_len * batch_size)) # rel_size x (#head x #dep x batch_size) if is_train or arc_targets is not None: mask_rel: nd.NDArray = reshape_fortran(nd.array(mask * arc_targets), (1, seq_len * seq_len * batch_size)) flat_rel_preds = flat_rel_logits.argmax(0) flat_rel_target = nd.array(reshape_fortran(rel_targets, (1, seq_len * seq_len * batch_size))).squeeze( axis=0) rel_correct = nd.equal(flat_rel_preds, flat_rel_target).asnumpy() rel_correct = rel_correct * flatten_numpy(arc_targets * mask) rel_accuracy = np.sum(rel_correct) / np.sum(arc_targets * mask) losses = self.softmax_loss(flat_rel_logits, flat_rel_target) rel_loss = nd.sum(losses * mask_rel) / mask_rel.sum() if is_train or arc_targets is not None: loss = arc_loss + rel_loss if is_train: return arc_accuracy, rel_accuracy, loss outputs = [] rel_preds = rel_logits.transpose([1, 0, 2, 3]).argmax(0) arc_preds = arc_preds.transpose([2, 0, 1]) rel_preds = rel_preds.transpose([2, 0, 1]) for msk, arc_pred, rel_pred in zip(np.transpose(mask), arc_preds, rel_preds): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_pred[:sent_len, :sent_len] outputs.append((arc_pred[:sent_len, :sent_len], arc_pred * rel_pred[:sent_len, :sent_len])) return outputs
def accuracy_metric(gallery_features, gallery_label, query_features, query_label): B1 = nd.sum(nd.square(gallery_features), axis=1, keepdims=True) B2 = nd.sum(nd.square(query_features), axis=1, keepdims=True) dist_mat = nd.broadcast_add(B2, B1.T) - 2 * nd.dot(query_features, gallery_features.T) label_mask = nd.broadcast_equal(dist_mat, nd.min(dist_mat, axis=1, keepdims=True)).astype('float32') pre_label_mat = nd.broadcast_mul(label_mask, gallery_label.reshape(1, -1).astype('float32')) pre_label_list = nd.max(pre_label_mat, axis=1) cor_num = nd.sum(nd.equal(pre_label_list, query_label.astype('float32'))) return cor_num.asnumpy()[0] / len(query_label)
def lifted_loss(net,data,label): label = label.reshape(-1, 1) label_mat = nd.equal(label, label.T).astype('float32') vec = net(data) dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True) dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T) p_row = nd.sum(nd.exp(1.0 - (dist_mat)) * (1 - label_mat), 1, True) loss = 1000 * (nd.log(p_row + p_row.T + 1e-5) + dist_mat) * label_mat / (2 * label_mat.sum()) return loss
def train_and_valid(ch_bert, model, ch_vocab, train_dataiter, dev_dataiter, trainer, finetune_trainer, epochs, loss_func, ctx, lr, batch_size, params_save_step, params_save_path_root, eval_step, log_step, check_step, logger, num_train_examples, warmup_ratio): batches = len(train_dataiter) num_train_steps = int(num_train_examples / batch_size * epochs) num_warmup_steps = int(num_train_steps * warmup_ratio) global_step = 0 dev_bleu_score = 0 for epoch in range(epochs): for content, token_types, valid_len, label, example_id in train_dataiter: # learning rate schedule if global_step < num_warmup_steps: new_lr = lr * global_step / num_warmup_steps else: non_warmup_steps = global_step - num_warmup_steps offset = non_warmup_steps / (num_train_steps - num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) content = content.as_in_context(ctx) token_types = token_types.as_in_context(ctx) valid_len = valid_len.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = model(content, token_types, valid_len) loss_mean = loss_func(output, label) loss_mean = nd.sum(loss_mean) / batch_size loss_mean.backward() loss_scalar = loss_mean.asscalar() trainer.step(1) finetune_trainer.step(1) if global_step and global_step % log_step == 0: acc = nd.sum( nd.equal(nd.argmax(nd.softmax(output, axis=-1), axis=-1), label)) / batch_size acc = acc.asscalar() logger.info( "epoch:{}, batch:{}/{}, acc:{}, loss:{}, (lr:{}s)".format( epoch, global_step % batches, batches, acc, loss_scalar, trainer.learning_rate)) global_step += 1 F1 = dev(ch_bert, model, ch_vocab, dev_dataiter, logger, ctx) if not os.path.exists(params_save_path_root): os.makedirs(params_save_path_root) model_params_file = params_save_path_root + \ "model_step_{}_{}.params".format(global_step, F1) model.save_parameters(model_params_file) logger.info("{} Save Completed.".format(model_params_file))
def Treplit_hard_loss(net,data,label): label = label.reshape(-1, 1) label_mat = nd.equal(label, label.T).astype('float32') vec = net(data) dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True) dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T) p_min=nd.log(nd.sum(label_mat*nd.exp(dist_mat),axis=1)) p_max=nd.log(nd.sum((1-label_mat)*nd.exp(-dist_mat)),axis=1) loss=nd.relu(p_min+p_max+1) return loss
def hard_example_mining(dist_mat, labels, return_inds=False): """For each anchor, find the hardest positive and negative sample. Args: dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] labels: pytorch LongTensor, with shape [N] return_inds: whether to return the indices. Save time if `False`(?) Returns: dist_ap: pytorch Variable, distance(anchor, positive); shape [N] dist_an: pytorch Variable, distance(anchor, negative); shape [N] p_inds: pytorch LongTensor, with shape [N]; indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 n_inds: pytorch LongTensor, with shape [N]; indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 NOTE: Only consider the case in which all labels have same num of samples, thus we can cope with all anchors in parallel. """ assert len(dist_mat.shape) == 2 assert dist_mat.shape[0] == dist_mat.shape[1] N = dist_mat.shape[0] # shape [N, N] is_pos = nd.equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32') is_neg = nd.not_equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32') # `dist_ap` means distance(anchor, positive) # both `dist_ap` and `relative_p_inds` with shape [N, 1] dist_pos = dist_mat * is_pos dist_ap = nd.max(dist_pos, axis=1) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_neg = dist_mat * is_neg + nd.max(dist_mat, axis=1, keepdims=True) * is_pos dist_an = nd.min(dist_neg, axis=1) # shape [N] # if return_inds: # # shape [N, N] # ind = (labels.new().resize_as_(labels) # .copy_(torch.arange(0, N).long()) # .unsqueeze(0).expand(N, N)) # # shape [N, 1] # p_inds = torch.gather( # ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data) # n_inds = torch.gather( # ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data) # # shape [N] # p_inds = p_inds.squeeze(1) # n_inds = n_inds.squeeze(1) # return dist_ap, dist_an, p_inds, n_inds return dist_ap, dist_an
def main(): net=resnet.features #net.load_parameters('./new_metric_200.params') #net.initialize(init=mx.init.Xavier()) net.collect_params().reset_ctx(ctx) transforms=gluon.data.vision.transforms.Compose([ gluon.data.vision.transforms.RandomSaturation(0.2), gluon.data.vision.transforms.RandomContrast(0.2), gluon.data.vision.transforms.RandomBrightness(0.1), gluon.data.vision.transforms.RandomFlipTopBottom(), gluon.data.vision.transforms.ToTensor() ]) test_dataset = Reader('./data/imgs', 'metric',data_argument=False) test_data = gluon.data.DataLoader(test_dataset.transform_first(transforms), batch_size, True, num_workers=16) acc = evaluate(net, test_data, ctx=ctx) print('Accuracy: %s' % (acc)) for epoch in range(201): train_data = Reader('./data/imgs', 'metric',data_argument=True) train_data = DataLoader( train_data.transform_first(transforms), batch_size, False,num_workers=16) total_loss=0 start_time=time.time() for i,(data,label) in enumerate(train_data): data=data.as_in_context(ctx) label=label.as_in_context(ctx) with autograd.record(): label=label.reshape(-1,1) label_mat = nd.equal(label,label.T).astype('float32') vec = net(data) vec=nd.Flatten(vec) dist_mat = - nd.dot(vec, vec.T)/50 p_row=nd.sum(nd.exp(1.0-(dist_mat))*(1-label_mat),1,True) loss=(nd.log(p_row+p_row.T + 1e-5)+dist_mat)*label_mat loss=nd.relu(loss) loss.backward() now_lr = 0.01*(0.99**epoch) trainer_triplet = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': now_lr,'momentum':0.9,'wd':0.00005}) trainer_triplet.step(batch_size) total_loss += mx.nd.mean(loss).asscalar() # if i % 20 == 0: # print('Batch: %s, Loss: %s' % (i, total_loss)) print('Epoch: %s, Loss: %s, Time: %s' % (epoch, total_loss/len(train_data),time.time()-start_time)) start_time=time.time() if epoch>0 and epoch%5==0: acc=evaluate(net, test_data, ctx=ctx) print('Accuracy: %s,Time: %s'%(acc,time.time()-start_time)) if epoch>10 and epoch%2==0: net.save_parameters('8.15_mobilenet_metric_'+str(epoch)+'.params')
def balance_sampler(samples): """ignore extra negative samples to keep batch balance""" num_pos = nd.sum(samples == 1, axis=0) num_neg = nd.sum(samples == 0, axis=0) drop_prob = (num_neg - num_pos) / num_neg drop_prob = nd.where(nd.lesser(drop_prob, 0), nd.zeros_like(drop_prob), drop_prob) mask = nd.where( nd.greater( nd.random.uniform(0, 1, shape=samples.shape, ctx=samples.context), drop_prob), nd.ones_like(samples), nd.zeros_like(samples)) mask = nd.where(nd.equal(samples, 1), samples, mask) return mask
def distance11(regions_high,regions_low,i,binnum,bins): #Initiate empty array for storing the number of counted pairs in each distance range bin count0=nd.zeros(binnum-1,gpu(0),dtype="float32") #Calculate index coordinates and directions by chuncks a=regions_high[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions_high.shape[0]),:] b=regions_low[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions_low.shape[0]),:] a1=nd.array(a,gpu(0)) b1=nd.array(b,gpu(0)) a1_b1=(nd.expand_dims(a1,axis=1)-b1).reshape((-1,2)) x1_x2=a1_b1[:,0] y1_y2=a1_b1[:,1] ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,)) for p in range (0,binnum-1): booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1])) count0[p]+=nd.nansum(booleanmask) return(count0)
def hard_example_mining(dist_mat, labels): assert len(dist_mat.shape) == 2 assert dist_mat.shape[0] == dist_mat.shape[1] N = dist_mat.shape[0] # shape [N, N] is_pos = nd.equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32') is_neg = nd.not_equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32') dist_pos = dist_mat * is_pos dist_ap = nd.max(dist_pos, axis=1) dist_neg = dist_mat * is_neg + nd.max(dist_mat, axis=1, keepdims=True) * is_pos dist_an = nd.min(dist_neg, axis=1) return dist_ap, dist_an
def forward(self, x, target): assert x.shape[1] == self.size #sequence length with autograd.pause(): true_dist = nd.zeros_like(x) + self.smoothing / (self.size - 2) target_mask = nd.zeros_like(true_dist) for r, c in enumerate(target): target_mask[r,c] = 1 true_dist = nd.where(target_mask, nd.zeros_like(true_dist) + self.confidence, true_dist) true_dist[:, self.padding_idx] = 0 mask = nd.equal(target,self.padding_idx) if len(mask.shape) > 0: true_dist = nd.where( nd.squeeze(mask), nd.zeros_like(true_dist) ,true_dist ) self.true_dist = true_dist return self.criterion(x, true_dist.as_in_context(cfg.ctx))
def _get_new_finished_state(self, state, new_seq, new_log_probs): """Combine new and old finished sequences, and gather the top k sequences. Args: state: A dictionary with the current loop state. new_seq: New sequences generated by growing the current alive sequences int32 tensor with shape [batch_size, beam_size, i + 1] new_log_probs: Log probabilities of new sequences float32 tensor with shape [batch_size, beam_size] Returns: Dictionary with finished keys from _StateKeys: {Top beam_size finished sequences based on score, Scores of finished sequences, Finished flags of finished sequences} """ i = state[_StateKeys.CUR_INDEX] finished_seq = state[_StateKeys.FINISHED_SEQ] finished_scores = state[_StateKeys.FINISHED_SCORES] finished_flags = state[_StateKeys.FINISHED_FLAGS] finished_seq = nd.concat(finished_seq, nd.zeros(shape=(self.batch_size, self.beam_size, 1), ctx=ctx), dim=2) length_norm = _length_normalization(self.alpha, i + 1) new_scores = new_log_probs / length_norm new_finished_flags = nd.equal(new_seq[:, :, -1], self.eos_id) new_scores = new_scores + (1. - new_finished_flags) * -INF # combine sequences, scores, and flags finished_seq = nd.concat(finished_seq, new_seq, dim=1) finished_scores = nd.concat(finished_scores, new_scores, dim=1) finished_flags = nd.concat(finished_flags, new_finished_flags, dim=1) top_finished_seq, top_finished_scores, top_finished_flags = _gather_topk_beams( [finished_seq, finished_scores, finished_flags], finished_scores, self.batch_size, self.beam_size) return { _StateKeys.FINISHED_SEQ: top_finished_seq, _StateKeys.FINISHED_SCORES: top_finished_scores, _StateKeys.FINISHED_FLAGS: top_finished_flags }
def get_rmse(class_pre_l, class_true_l, con_pre_l, con_true_l, data_utils): # find right predictions one_zero_pre = nd.where(class_pre_l > 0.5, nd.ones_like(class_pre_l), nd.zeros_like(class_pre_l)) compare = nd.equal(one_zero_pre, class_true_l).sum(axis=1) weight_right = nd.repeat(nd.expand_dims(nd.where(compare == 3, nd.ones_like(compare), nd.zeros_like(compare)),\ axis=0),repeats=2,axis=0).transpose() # calculate rmse based on right prediction eth_co_me_limit = nd.array([[ data_utils.scale_CO[1], data_utils.scale_CO[0], data_utils.scale_Me[0] ]]) concentration_mat = nd.where(class_pre_l > 0.5, nd.repeat(eth_co_me_limit,repeats=class_pre_l.shape[0],axis=0), \ nd.zeros_like(class_pre_l)) eth_pre_con, eth_pre_con_true = concentration_mat[:, 0] * con_pre_l[:, 1], concentration_mat[:, 0] * con_true_l[:, 1] co_pre_con, co_pre_con_true = concentration_mat[:, 1] * con_pre_l[:, 0], concentration_mat[:, 1] * con_true_l[:, 0] me_pre_con, me_pre_con_true = concentration_mat[:, 2] * con_pre_l[:, 0], concentration_mat[:, 2] * con_true_l[:, 0] co_or_me_con, co_or_me_con_true = co_pre_con + me_pre_con, co_pre_con_true + me_pre_con_true co_or_me_eth_con = nd.concat(nd.expand_dims(co_or_me_con, axis=0), nd.expand_dims(eth_pre_con, axis=0), dim=0).transpose() co_or_me_eth_con_true = nd.concat(nd.expand_dims(co_or_me_con_true, axis=0), nd.expand_dims(eth_pre_con_true, axis=0), dim=0).transpose() # rmse = (((co_or_me_eth_con-co_or_me_eth_con_true)**2*weight_right).sum()/(weight_right[:,0].sum()))**(0.5) rmse = (((co_or_me_eth_con - co_or_me_eth_con_true)**2).mean(axis=0)) return rmse
def forward(self, pred, target): batch_size = target.shape[0] label_size = target.shape[1] ## rank weight to sample and rank_weights = self.rank_weights max_num_trials = target.shape[1] - 1 pos_mask = nd.greater(target, 0).asnumpy() neg_mask = nd.equal(target, 0).asnumpy() L = nd.zeros_like(pred) for i in range(batch_size): for j in range(label_size): if target[i, j] == 1: ##initialization sample_score_margin = -1 num_trials = 0 while ((sample_score_margin < 0) and (num_trials < max_num_trials)): neg_labels_idx = np.array([ idx for idx, v in enumerate(target[i, :]) if v == 0 ]) if len(neg_labels_idx) > 0: neg_idx = np.random.choice(neg_labels_idx, replace=False) sample_score_margin = pred[i, neg_idx] - pred[i, j] num_trials += 1 else: num_trials = 1 pass ## how many trials determin the weight r_j = int(np.floor(max_num_trials / num_trials)) L[i, j] = rank_weights[r_j] #print("L weight",L) loss = nd.sum( L * (nd.sum(1 - nd.array(pos_mask).as_in_context(pred.context) * pred + nd.array(neg_mask).as_in_context(pred.context) * pred, axis=1, keepdims=True))) self.save_for_backward(L, pos_mask, neg_mask) return loss
def distance2(regions,i,binnum,bins): #Initiate empty array for storing the number of counted pairs in each distance range bin count0=nd.zeros(binnum-1,gpu(0),dtype="float32") seed=nd.zeros((1,2),gpu(0)) #Calculate index coordinates and directions by chuncks a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:] b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:] a1=nd.array(a,gpu(0)) b1=nd.array(b,gpu(0)) for ii in range (a1.shape[0]-1): a1_b1=(nd.expand_dims(a1[ii].reshape((1,2)),axis=1)-b1[ii+1:,:]).reshape((a1[ii+1:,:].shape[0],2)) seed=nd.concat(seed,a1_b1,dim=0) if seed.shape[0]>1: x1_x2=seed[1:,0] y1_y2=seed[1:,1] ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,)) for p in range (0,binnum-1): booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1])) count0[p]+=nd.nansum(booleanmask) return(count0)
def batch_loss(transformer_model, en_sentences, x_en_emb, x_en_idx, y_zh_idx, loss): batch_size = x_en_emb.shape[0] ch2idx, idx2ch = load_ch_vocab() y_zh_idx_nd = nd.array(y_zh_idx, ctx=ghp.ctx) dec_input_zh_idx = nd.concat( nd.ones(shape=y_zh_idx_nd[:, :1].shape, ctx=ghp.ctx) * 2, y_zh_idx_nd[:, :-1], dim=1) x_en_emb = x_en_emb x_en_idx = x_en_idx output = transformer_model(x_en_emb, x_en_idx, dec_input_zh_idx, True) predict = nd.argmax(nd.softmax(output, axis=-1), axis=-1) # print("input_idx:", dec_input_zh_idx[0]) # print("predict_idx:", predict[0]) print("source:", en_sentences[0]) label_token = [] for n in range(len(y_zh_idx[0])): label_token.append(idx2ch[int(y_zh_idx[0][n])]) print("target:", "".join(label_token)) predict_token = [] for n in range(len(predict[0])): predict_token.append(idx2ch[int(predict[0][n].asscalar())]) print("predict:", "".join(predict_token)) is_target = nd.not_equal(y_zh_idx_nd, 0) # print(is_target) current = nd.equal(y_zh_idx_nd, predict) * is_target acc = nd.sum(current) / nd.sum(is_target) l = loss(output, y_zh_idx_nd) l_mean = nd.sum(l) / batch_size return l_mean, acc
def getMask(q_seq, k_seq): # q_seq shape : (batch_size, q_seq_len) # k_seq shape : (batch_size, k_seq_len) q_len = q_seq.shape[1] pad_mask = nd.not_equal(k_seq, 0) pad_mask = nd.expand_dims(pad_mask, axis=1) pad_mask = nd.broadcast_axes(pad_mask, axis=1, size=q_len) return pad_mask def getSelfMask(q_seq): batch_size, seq_len = q_seq.shape mask_matrix = np.ones(shape=(seq_len, seq_len), dtype=np.float) mask = np.tril(mask_matrix, k=0) mask = nd.expand_dims(nd.array(mask, ctx=ghp.ctx), axis=0) mask = nd.broadcast_axes(mask, axis=0, size=batch_size) return mask if __name__ == '__main__': mask = getMask(nd.array([[1, 2, 0], [1, 0, 0]]), nd.array([[1, 2, 3], [5, 0, 0]])) print(mask) mask = getSelfMask(nd.array([[1, 2, 0], [2, 0, 0], [0, 0, 0]])) print(mask) score = nd.array([[5, 6, 0], [5, 10, 0]]) pading = nd.ones_like(score) * 0.1 score = nd.where(nd.equal(mask[0], 0), pading, score) print(score)
def train(self, inputs, outputs, epochs=10, batch_size=32, lr=0.001, transform=None, verbose=True): """train the neural network to fit the outputs with the inputs. Args: inputs: an ndarray of input. outputs: an ndarray of outputs. epochs, batch_size, lr: the parameters of the learning algorithm. transform: if None, take the output as given, else try to compute transformed outputs = transform(outputs) and fit with them. verbose: If True then the results will be displayed all along the training. Returns: The historical of the training. (tuple of array).""" if transform: outputs = transform(outputs) n = (inputs.shape[1] - 1) // batch_size + 1 #inputs-1/batch - 1 < n <= inputs-1/batch if len(outputs.shape) == 1: outputs = outputs.reshape((1, outputs.shape[0])) assert inputs.shape[1] == outputs.shape[1], "Shapes does not match." data = nd.concat(inputs.T, outputs.T) efficiencies = [] cumuLosses = [] epochs = list(range(epochs)) for i in epochs: efficiency = 0 cumuLoss = 0 data = nd.shuffle(data) batchs = [ data[k * batch_size:min(inputs.shape[1], (k + 1) * batch_size), :] for k in range(n) ] for batch in batchs: with autograd.record(): output = self.compute(batch[:, :inputs.shape[0]].T) loss = SymNet.squared_error(output, batch[:, inputs.shape[0]:].T) loss.backward() self.adam_descent(batch_size, lr) output = nd.round(output) cumuLoss += loss.asscalar() efficiency += nd.sum( nd.equal(output, batch[:, inputs.shape[0]:].T)).asscalar() efficiency /= outputs.shape[1] * outputs.shape[0] efficiencies.append(efficiency) cumuLoss /= outputs.shape[1] * outputs.shape[0] cumuLosses.append(cumuLoss) if verbose: print("Epochs %d: Pe = %lf , loss = %lf" % (i, 1 - efficiency, cumuLoss)) return (epochs, cumuLosses, efficiencies)
def train_and_valid(src_bert, mt_model, src_vocab, tgt_vocab, train_dataiter, dev_dataiter, trainer, finetune_trainer, epochs, loss_func, ctx, lr, batch_size, params_save_path_root, eval_step, log_step, check_step, label_smooth, logger, num_train_examples, warmup_ratio): batches = len(train_dataiter) num_train_steps = int(num_train_examples / batch_size * epochs) num_warmup_steps = int(num_train_steps * warmup_ratio) global_step = 0 dev_bleu_score = 0 for epoch in range(epochs): for src, tgt, label, src_valid_len, tgt_valid_len in train_dataiter: # learning rate strategy if global_step < num_warmup_steps: new_lr = lr * global_step / num_warmup_steps else: non_warmup_steps = global_step - num_warmup_steps offset = non_warmup_steps / \ (num_train_steps - num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) src = src.as_in_context(ctx) tgt = tgt.as_in_context(ctx) label = label.as_in_context(ctx) src_valid_len = src_valid_len.as_in_context(ctx) src_token_type = nd.zeros_like(src, ctx=ctx) tgt_mask = nd.not_equal(tgt, tgt_vocab(tgt_vocab.padding_token)) if label_smooth: eps = 0.1 num_class = len(tgt_vocab.idx_to_token) one_hot = nd.one_hot(label, num_class) one_hot_label = one_hot * \ (1 - eps) + (1 - one_hot) * eps / num_class with autograd.record(): src_bert_outputs = src_bert(src, src_token_type, src_valid_len) mt_outputs = mt_model(src_bert_outputs, src, tgt) loss_mean = loss_func(mt_outputs, one_hot_label, tgt_mask) loss_mean.backward() loss_scalar = loss_mean.asscalar() trainer.step(1) finetune_trainer.step(1) if global_step and global_step % log_step == 0: predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1) correct = nd.equal(label, predicts) accuracy = (nd.sum(correct * tgt_mask) / nd.sum(tgt_mask)).asscalar() logger.info( "epoch:{}, batch:{}/{}, bleu:{}, acc:{}, loss:{}, (lr:{}s)" .format(epoch, global_step % batches, batches, dev_bleu_score, accuracy, loss_scalar, trainer.learning_rate)) if global_step and global_step % check_step == 0: predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1) refer_sample = src.asnumpy().tolist() label_sample = label.asnumpy().tolist() pred_sample = predicts.asnumpy().tolist() logger.info("train sample:") logger.info("refer :{}".format(" ".join([ src_vocab.idx_to_token[int(idx)] for idx in refer_sample[0] ])).replace(src_vocab.padding_token, "")) logger.info("target :{}".format(" ".join([ tgt_vocab.idx_to_token[int(idx)] for idx in label_sample[0] ])).replace(EOS, "[EOS]").replace(tgt_vocab.padding_token, "")) logger.info("predict:{}".format(" ".join([ tgt_vocab.idx_to_token[int(idx)] for idx in pred_sample[0] ])).replace(EOS, "[EOS]")) if global_step and global_step % eval_step == 0: dev_bleu_score = eval(src_bert, mt_model, src_vocab, tgt_vocab, dev_dataiter, logger, ctx=ctx) if not os.path.exists(params_save_path_root): os.makedirs(params_save_path_root) model_params_file = params_save_path_root + \ "src_bert_step_{}.params".format(global_step) src_bert.save_parameters(model_params_file) logger.info("{} Save Completed.".format(model_params_file)) model_params_file = params_save_path_root + \ "mt_step_{}.params".format(global_step) mt_model.save_parameters(model_params_file) logger.info("{} Save Completed.".format(model_params_file)) writer.add_scalar("loss", loss_scalar, global_step) global_step += 1
def accuracy(output, label, batch_size): out = nd.argmax(output, axis=1) res = nd.sum(nd.equal(out.reshape((-1, 1)), label)) / batch_size return res
def get_padding(x, padding_value=0): return nd.equal(x, padding_value)
def accuracy(predictions, targets): predictions = nd.argmax(predictions, 1) return nd.mean(nd.equal(predictions, targets)).asscalar() * 100