def train(model, features, X, X_train, y_train, epochs): cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': 0.001, 'momentum': 1 }) feature_representations = [features(X).asnumpy()] for e in range(1, epochs + 1): cum_loss = 0 cum_preds = [] for i, x in enumerate(X_train): y = nd.array(y_train)[i] with autograd.record(): preds = model(X)[x] loss = cross_entropy(preds, y) loss.backward() trainer.step(1) cum_loss += loss.asscalar() cum_preds += [preds.asscalar] feature_representations.append(features(X).asnumpy()) if (e % (epochs // 10)) == 0: print(f"Eopch {e}/ {epochs} -- Loss: {cum_loss: .4f}") print(cum_preds) return feature_representations
def train(embed_size, idx_to_token, lr, num_epochs, ctx, data_iter, batch_size): net = nn.Sequential() net.add(nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size), nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size)) net.initialize(ctx=ctx, force_reinit=True) loss_fun = gloss.SigmoidBinaryCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) for epoch in range(num_epochs): start_time = time.time() loss_sum = 0.0 n = 0 for batch in data_iter: center, context_negative, mask, label = (data.as_in_context(ctx) for data in batch) with autograd.record(): pred = skip_gram_model(center, context_negative, net[0], net[1]) l = (loss_fun(pred.reshape(label.shape), label, mask) * mask.shape[1] / mask.sum(axis=1)) l.backward() trainer.step(batch_size) # we haven't divide loss by batch size loss_sum = loss_sum + l.sum().asscalar() n = n + l.size print('epoch %d, loss %.2f, time %.2fs' % (epoch + 1, loss_sum / n, time.time() - start_time)) net.save_parameters(f'./data/params_{epoch+1}')
def train(encoder, sent_rnn, dataset, lr, batch_size, num_epochs, vocab, ctx): print('start training') encoder.initialize(init.Xavier(), force_reinit=True, ctx=ctx) sent_rnn.initialize(init.Xavier(), force_reinit=True, ctx=ctx) enc_trainer = gluon.Trainer(encoder.collect_params(), 'adam', {'learning_rate': lr}) sent_rnn_trainer = gluon.Trainer(sent_rnn.collect_params(), 'adam', {'learning_rate': lr}) loss = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) best_batch = 0 best_loss = float('Inf') for epoch in range(num_epochs): l_sum = 0.0 for i, (X, Y) in enumerate(data_iter): X = X.as_in_context(ctx) Y = Y.as_in_context(ctx) with autograd.record(): l = batch_loss(encoder, sent_rnn, X, Y, vocab, loss, ctx) l.backward() sent_rnn_trainer.step(X.shape[1]) enc_trainer.step(X.shape[1]) l_sum += l.asscalar() if i % 20 == 0: info = "epoch %d, batch %d, batch_loss %.3f" % (epoch, i, l.asscalar()) print(info) cur_loss = l_sum / len(data_iter) if cur_loss < best_loss: best_loss = cur_loss best_batch = epoch + 1 if not os.path.exists('../model'): os.mkdir('../model') encoder.save_parameters('../model/encoder' + str(epoch + 1) + '.params') sent_rnn.save_parameters('../model/sent_rnn' + str(epoch + 1) + '.params') info = "epoch %d, loss %.3f, best_loss %.3f, best_batch %d" % ( epoch, cur_loss, best_loss, best_batch) print(info) # log if not os.path.exists('../log'): os.mkdir('../log') with open('../log/log.log', 'a', encoding='utf-8') as fa: fa.write(time.ctime() + "\t" + info + '\n')
def __init__(self, batch_axis=0, weight=None, box_loss_type='mse', **kwargs): super(YOLOv3Loss, self).__init__(weight, batch_axis, **kwargs) self._sigmoid_ce = mloss.SigmoidBinaryCrossEntropyLoss( from_sigmoid=False) self.target = YOLOV3TargetMerger(20, ignore_iou_thresh=0.5) self._loss_type = box_loss_type if box_loss_type == 'mse': self._l1_loss = mloss.L1Loss() # self._l2_loss = mloss.L2Loss() else: self._iou_loss = IoULoss(x1y1x2y2=True, loss_type=box_loss_type)
def test_net(cfg, epoch_idx=-1, output_dir=None, test_data_loader=None, test_writer=None, encoder=None, decoder=None, refiner=None, merger=None): bce_loss = gloss.SigmoidBinaryCrossEntropyLoss() # Testing loop n_samples = len(test_data_loader) test_iou = dict() encoder_losses = utils.network_utils.AverageMeter() refiner_losses = utils.network_utils.AverageMeter() ctx = d2l.try_gpu() for sample_idx, (ids, rendering_images, ground_truth_volume) in enumerate(test_data_loader): #id_to_name= {'02691156': 'aeroplane', '02828884': 'bench', '02933112': 'cabinet', '02958343': 'car', '03001627': 'chair', '03211117': 'display', '03636649': 'lamp', '03691459': 'speaker', '04090263': 'rifle', '04256520': 'sofa', '04379243': 'table', '04401088': 'telephone', '04530566': 'watercraft'} rendering_images = rendering_images.as_in_context(ctx) ground_truth_volume = ground_truth_volume.as_in_context(ctx) # Test the encoder, decoder, refiner and merger gen_volumes2, gen_volumes = forward(encoder, decoder, merger, refiner, rendering_images) gen_volumes = nd.mean(gen_volumes, axis=1) encoder_loss = bce_loss(gen_volumes, ground_truth_volume) * 10 refiner_loss = bce_loss(gen_volumes2, ground_truth_volume) * 10 # Append loss and accuracy to average metrics encoder_losses.update(encoder_loss.mean().asscalar()) refiner_losses.update(refiner_loss.mean().asscalar()) # IoU per sample sample_iou = [] for th in cfg.TEST.VOXEL_THRESH: _volume = (gen_volumes2 > th) intersection = nd.sum((_volume * ground_truth_volume)) union = nd.sum(((_volume + ground_truth_volume) >= 1)) sample_iou.append((intersection / union).asscalar()) print('[INFO] %s Test EDLoss = %.4f RLoss = %.4f IoU = %s' % (dt.now(), encoder_loss.mean().asscalar(), refiner_loss.mean().asscalar(), ['%.4f' % si for si in sample_iou])) return nd.array(sample_iou).mean().asscalar()
def train(net_G, net_D, dataloader, opt): ''' Entry of Training process :return: ''' sw = SummaryWriter(logdir='./logs', flush_secs=5) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) mx.random.seed(opt.manualSeed) ctx = try_gpu() print("ctx: ", ctx) # initialize netG, netD net_G, net_D = init_networks(net_G, net_D, opt, ctx) # optimizer settings trainer_G = Trainer(net_G.collect_params(), optimizer='adam', optimizer_params={ 'learning_rate': opt.lrG, 'beta1': opt.beta1, 'beta2': 0.999 }) trainer_D = Trainer(net_D.collect_params(), optimizer='adam', optimizer_params={ 'learning_rate': opt.lrD, 'beta1': opt.beta1, 'beta2': 0.999 }) loss_f = loss.SigmoidBinaryCrossEntropyLoss() print("Start training ...") for epoch in range(opt.num_epochs): train_step(dataloader, net_G, net_D, trainer_G, trainer_D, loss_f, opt, ctx, sw, epoch) # do checkpointing net_G.save_parameters('{0}/netG_epoch_{1}.param'.format( opt.experiment, epoch)) net_D.save_parameters('{0}/netD_epoch_{1}.param'.format( opt.experiment, epoch))
def __init__(self, xlnet_base, start_top_n=None, end_top_n=None, is_eval=False, units=768, prefix=None, params=None): super(XLNetForQA, self).__init__(prefix=prefix, params=params) with self.name_scope(): self.xlnet = xlnet_base self.start_top_n = start_top_n self.end_top_n = end_top_n self.loss = loss.SoftmaxCELoss() self.start_logits = PoolerStartLogits() self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval) self.eval = is_eval self.answer_class = XLNetPoolerAnswerClass(units=units) self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss()
def loss_FLD(pre, label, attr, keypoint_weight): L_weight, S_weight, GL_weight, GE_weight, P_weight = keypoint_weight # "landmarks", "smile", "glasses", "gender", "pose" sigmoid = loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) softmax = loss.SoftmaxCrossEntropyLoss(from_logits=False, sparse_label=False) # sparse_label=True L, others = attr.shape ctx = attr.context res_attr, dims = [], [2, 2, 2, 5] for t in range(others): tmp = nd.zeros((L, dims[t])).as_in_context(ctx) tmp[nd.arange(L).as_in_context(ctx), attr[:, t]] = 1 res_attr.append(tmp) smile, glasses, gender, pose = res_attr Plandmarks, Psmile, Pglasses, Pgender, Ppose = pre Lloss = NormlizedMSE(Plandmarks, label) * L_weight Sloss = Iface.idx["smile"] * sigmoid(Psmile, smile) * S_weight GLloss = Iface.idx["glasses"] * sigmoid(Pglasses, glasses) * GL_weight GEloss = Iface.idx["gender"] * sigmoid(Pgender, gender) * GE_weight Ploss = Iface.idx["pose"] * softmax(Ppose, pose) * P_weight return Lloss, nd.sum(Sloss), nd.sum(GLloss), nd.sum(GEloss), nd.sum(Ploss)
def train(self, lr, num_epochs): # import ipdb; ipdb.set_trace() ctx = d2l.try_gpu() embed_size = self.embed_size self.net = nn.Sequential() self.net.add( nn.Embedding(input_dim=len(self.idx_to_token), output_dim=embed_size), nn.Embedding(input_dim=len(self.idx_to_token), output_dim=embed_size)) self.net.initialize(ctx=ctx, force_reinit=True) trainer = gluon.Trainer(self.net.collect_params(), 'adam', {'learning_rate': lr}) loss = gloss.SigmoidBinaryCrossEntropyLoss() for epoch in range(num_epochs): start, l_sum, n = time.time(), 0.0, 0 for batch in self.data_iter: center, context_negative, mask, label = [ data.as_in_context(ctx) for data in batch ] with autograd.record(): pred = self.skip_gram(center, context_negative, self.net[0], self.net[1]) l = (loss(pred.reshape(label.shape), label, mask) * mask.shape[1] / mask.sum(axis=1)) l.backward() trainer.step(self.batch_size) # l_sum += l_sum().asscalar() l_sum += l_sum n += l.size print('epoch %d, loss %.2f, time %.2fs' % (epoch + 1, l_sum / n, time.time() - start))
def __init__(self, feature_dict, args, ctx, task, **kwargs): """{"sparse":[SingleFeat],"dense":[SingleFeat]}""" super(xDeepFM, self).__init__(**kwargs) # ?? util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX) # self.feature_sizes = args.FEATURE_SIZE self.field_size = args.FIELD_NUM self.feature_dict = feature_dict print('field_size:') print(self.field_size) if args.TASK == 'finish': self.embedding_size = args.FINISH_EMBEDDING_SIZE self.batch_size = args.FINISH_BATCH_SIZE else: self.embedding_size = args.LIKE_EMBEDDING_SIZE self.batch_size = args.LIKE_BATCH_SIZE self.config_name = args.CONFIG_NAME # self.dropout_prob = args.DROPOUT_PROB self.task = task # self.loss = gloss.SigmoidBinaryCrossEntropyLoss() if args.LOSS == 'l2loss': self.loss = gloss.L2Loss() else: self.loss = gloss.SigmoidBinaryCrossEntropyLoss() self.ctx = ctx self.embedding_dict = OrderedDict() self.dense_dict = OrderedDict() with self.name_scope(): if self.task == 'finish': self.layer_list = [np.int(x) for x in args.FINISH_LAYER] self.dropout = args.FINISH_DROPOUT_PROB else: self.layer_list = [np.int(x) for x in args.LIKE_LAYER] self.dropout = args.LIKE_DROPOUT_PROB # self.params.get('v',shape=(self.field_size,self.embedding_size)) self.dnn_out = nn.Dense(1, use_bias=False) self.register_child(self.dnn_out) for feat in feature_dict['sparse']: self.embedding_dict[feat.feat_name] = nn.Embedding( feat.feat_num, self.embedding_size) for feat in feature_dict['dense']: self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size) for emb_k, emb_v in self.embedding_dict.items(): self.register_child(emb_v) for den_k, den_v in self.dense_dict.items(): self.register_child(den_v) self.linear_logit_dense = nn.Dense(1, use_bias=False) self.register_child(self.linear_logit_dense) self.linear_logit_embedding_bn = nn.BatchNorm() self.register_child(self.linear_logit_embedding_bn) self.dense_list = [] self.dropout_list = [] self.bn_list = [] self.activation_list = [] for i in range(len(self.layer_list)): self.dense_list.append(nn.Dense(self.layer_list[i])) self.dropout_list.append(nn.Dropout(self.dropout)) self.bn_list.append(nn.BatchNorm()) self.activation_list.append(nn.Activation('relu')) self.register_child(self.dense_list[i]) self.register_child(self.dropout_list[i]) self.register_child(self.bn_list[i]) self.register_child(self.activation_list[i]) # if True: print('true') self.layer_size = [np.int(x) for x in args.CONV1D_LAYER] # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx) # print('oo') # self.cin_net.initialize() # print('uu') # self.register_child(self.cin_net) self.cin_dense = nn.Dense(1) self.register_child(self.cin_dense) self.cin_bn = nn.BatchNorm() self.register_child(self.cin_bn) self.field_nums = [self.field_size] self.conv_list = [] for idx, size in enumerate(self.layer_size): self.conv_list.append( nn.Conv1D(channels=size, kernel_size=1, strides=1, padding=0, activation='relu', in_channels=self.field_nums[0] * self.field_nums[-1], weight_initializer=init.Uniform())) self.field_nums.append(size) self.register_child(self.conv_list[idx])
# saved result_dir = './results/images/DenseVAE_DenseLogReg_on_anime/256_5_1024_1_1024_200_10/' # Open a file to write to for training reports readme = open(result_dir + 'README.md', 'w') readme.write('VAE number of latent variables \t' + str(n_latent) + '\n\n') readme.write('VAE number of hidden layers \t' + str(n_hlayers) + '\n\n') readme.write('VAE number of hidden nodes per layer \t' + str(n_hnodes) + '\n\n') readme.write('LogReg number of hidden layers \t' + str(logreg_n_hlayers) + '\n\n') readme.write('LogReg number of hidden nodes per layer \t' + str(logreg_n_hnodes) + '\n\n') # Define the loss function for training the discriminator (the logreg) disc_loss_func = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) # Define the number of epochs to train n_epochs = 200 readme.write('Number of epochs trained \t' + str(n_epochs) + '\n\n') print("[STATE]: Training started") for epoch in range(n_epochs): # Start recording epoch training time start_time = time.time() # Initialize a list that records the average loss within each batch dense_vae_batch_losses = [] logreg_batch_losses = []
subsampled_dataset = [[tk for tk in st if not discard(tk)] for st in dataset] print('# tokens: %d' % sum([len(st) for st in subsampled_dataset])) print(compare_counts('the')) print(compare_counts('join')) # 提取中心词和背景词 all_centers, all_contexts = get_centers_and_contexts(subsampled_dataset, 5) # 负采样 sampling_weights = [counter[w]**0.75 for w in idx_to_token] all_negatives = get_negatives(all_contexts, sampling_weights, 5) # batch size batch_size = 512 num_workers = 0 if sys.platform.startswith('win32') else 4 loss = gloss.SigmoidBinaryCrossEntropyLoss() dataset = gdata.ArrayDataset(all_centers, all_contexts, all_negatives) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True, batchify_fn=batchify, num_workers=num_workers) embed_size = 100 epoch = 5 net = nn.Sequential() net.add( nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size), # 中心词向量 nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size)) # context词向量 train(net, 0.005, epoch)
def train(encoder, decoder, data_utils, param_dict, batch_size, num_epochs, cl_w, score_w): encoder.initialize(init.Xavier(), force_reinit=True) decoder.initialize(init.Xavier(), force_reinit=True) params_enc = encoder.collect_params() params_dec = decoder.collect_params() enc_trainer = gluon.Trainer( params_enc, 'sgd', { 'learning_rate': param_dict["lr"], "momentum": param_dict["momentum"], 'wd': param_dict["wd"], 'clip_gradient': param_dict["clip_gradient"] }) dec_trainer = gluon.Trainer( params_dec, 'sgd', { 'learning_rate': param_dict["lr"], "momentum": param_dict["momentum"], 'wd': param_dict["wd"], 'clip_gradient': param_dict["clip_gradient"] }) loss1 = gloss.L2Loss() loss2 = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) pre_rmse = 100000 print("train begin......") for epoch in range(num_epochs): print("*" * 70) l_sum, l_class_sum, l_score_sum, n, acc_sum = 0.0, 0.0, 0.0, 0, 0.0 rmse_sum = nd.array([0, 0, 0]) data_iter = data_utils.get_batch_train(batch_size) if (epoch + 1) > 10 and enc_trainer.learning_rate > 0.001: lr_enc_trainer = enc_trainer.learning_rate * 0.99 lr_dec_trainer = dec_trainer.learning_rate * 0.98 enc_trainer.set_learning_rate(lr_enc_trainer) dec_trainer.set_learning_rate(lr_dec_trainer) for i, (X, Y) in enumerate(data_iter): X, Y = nd.array(X).transpose([1, 0, 2]), nd.array(Y) # print(X.shape,Y.shape) with autograd.record(): # print("here:",X.shape,Y.shape) l_class, l_score, b, cl_res_mini, score_res_mini = batch_loss( encoder, decoder, X, Y, loss1, loss2, cl_w, score_w) l = (l_class / b) + (l_score / b) l.backward() enc_trainer.step(1) dec_trainer.step(1) l_sum += l.asscalar() l_class_sum += (l_class / b).asscalar() l_score_sum += (l_score / b).asscalar() tmp_mini_acc = get_accuracy(cl_res_mini, Y[:, :3]).asscalar() # tmp_mini_rmse = get_rmse(cl_res_mini,Y[:, :3],score_res_mini,Y[:,3:],data_utils) pre_and_true_concentration = concentration_transfer( cl_res_mini, Y[:, :3], score_res_mini, Y[:, 3:], data_utils) tmp_mini_rmse = get_rmse_v2(pre_and_true_concentration[0], pre_and_true_concentration[1]) acc_sum += tmp_mini_acc rmse_sum = rmse_sum + tmp_mini_rmse n += 1 if (i + 1) % 1000 == 0 or (i == 0 and epoch == 0): c1, c2, c3 = ((rmse_sum / n)**0.5)[0].asscalar(), ( (rmse_sum / n)**0.5)[1].asscalar(), ((rmse_sum / n)**0.5)[2].asscalar() print("total loss: {:.6}".format(l_sum/n),",loss class:{:.6}".format(l_class_sum/n),\ ",loss concentration:{:.6}".format(l_score_sum/n),",[train acc:{:.6}]".format(acc_sum/n),\ "[Eth Co Me rmse:[{:.6},{:.6},{:.6}]]".format(c1,c2,c3), "lr is:{:.6}".format(enc_trainer.learning_rate)) #for test loss n_test = 0 l_test = nd.array([0]) test_data = data_utils.get_batch_test(batch_size=100) cl_pres, cl_trues = [], [] con_pres, con_trues = [], [] for x, y in test_data: x, y = nd.array(x).transpose([1, 0, 2]), nd.array(y) batchsize_t = x.shape[1] enc_state = encoder.begin_state(batch_size=batchsize_t) enc_outputs, enc_state = encoder(x, enc_state) dec_state = decoder.begin_state(enc_state) dec_output = decoder(dec_state, enc_outputs) cl_res, score_res = class_and_score_forward(dec_output) cl_pres.append(cl_res) cl_trues.append(y[:, :3]) con_pres.append(score_res) con_trues.append(y[:, 3:]) l_ce = loss2(cl_res, y[:, :3]).sum() l_l2 = loss1(score_res, y[:, 3:]).sum() l_total = l_ce + l_l2 l_test += l_total n_test += y.shape[0] all_class_pres = nd.concat(*cl_pres, dim=0) all_class_trues = nd.concat(*cl_trues, dim=0) acc = get_accuracy(all_class_pres, all_class_trues).asscalar() all_con_pres = nd.concat(*con_pres, dim=0) all_con_trues = nd.concat(*con_trues, dim=0) # test_rmse = get_rmse(all_class_pres, all_class_trues, all_con_pres, all_con_trues, data_utils) test_pre_and_true_concentration = concentration_transfer( all_class_pres, all_class_trues, all_con_pres, all_con_trues, data_utils) test_rmse = get_rmse_v2(test_pre_and_true_concentration[0], test_pre_and_true_concentration[1]) f = open("record.csv", "w", newline='') results_writer(f,all_class_pres,all_class_trues,\ test_pre_and_true_concentration[0],test_pre_and_true_concentration[1]) if (epoch + 1) % 1 == 0: c1, c2, c3 = (test_rmse**0.5)[0].asscalar(), ( test_rmse**0.5)[1].asscalar(), (test_rmse**0.5)[2].asscalar() print("epoch %d, train_loss %.3f,test_loss: %.3f, " % (epoch + 1, l_sum/n,l_test.asscalar()/n_test)+\ "[test Eth Co Me rmse:[{:.6},{:.6},{:.6}]]".format(c1,c2,c3),"[train acc:{:.6}]".format(acc_sum/n),\ "[test acc:{:.6}]".format(acc)) # if test_rmse < pre_rmse: encoder.save_parameters(param_enc) decoder.save_parameters(param_dec) # pre_rmse = test_rmse print("params updated!") print("*" * 70) f.close()
def train_eval(opt): mx.random.seed(123) np.random.seed(123) os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3' gpus = [] if opt.gpus is None or opt.gpus is '' else [ int(gpu) for gpu in opt.gpus.split(',')] num_gpus = len(gpus) batch_size = opt.batch_per_device*max(1,num_gpus) context = [mx.gpu(i) for i in gpus] if num_gpus>0 else [mx.cpu()] steps = [int(step) for step in opt.lr_scheduler_steps.split(',')] vis_env = opt.dataset + opt.output vis = Visulizer(env=vis_env) vis.log(opt) #optional ucf101 or meitu,get net structure,loss criterion,train val loader if opt.dataset=='ucf101' or opt.dataset=='ucf': net = R2Plus2D(num_class=101,model_depth=opt.model_depth) loss_criterion = gloss.SoftmaxCrossEntropyLoss() # loss function train_loader, val_loader = get_ucf101trainval(datadir='/data/jh/notebooks/hudengjun/DeepVideo/UCF-101', batch_size=batch_size, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h=opt.scale_h, scale_w=opt.scale_w, num_workers=opt.num_workers) # the train and evaluation data loader elif opt.dataset =='meitu': net = R2Plus2D(num_class=63,model_depth=opt.model_depth,final_temporal_kernel=opt.n_frame//8) # labels set 63 # train_loader,val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, # device_id=opt.decoder_gpu, # batch_size=batch_size, # n_frame=opt.n_frame, # crop_size=opt.crop_size, # scale_h=opt.scale_h, # scale_w=opt.scale_w, # num_workers=opt.num_workers) # use multi gpus to load data train_loader, val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, device_id=opt.decoder_gpu, batch_size=batch_size, num_workers=opt.num_workers, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h=opt.scale_h, scale_w=opt.scale_w, cache_size=opt.cache_size) #[type(data) for i,enumerate(train_loader) if i<2] # step when 66,in data/nvvl_meitu.py # create new find_nvv_error.py ,copy train_nvvl_r3d.py one by one test, # find error loss_dict = {'bce':gloss.SigmoidBinaryCrossEntropyLoss, 'warp_nn':WarpLoss, 'warp_fn':WARP_funcLoss, 'lsep_nn':LsepLoss, 'lsep_fn':LSEP_funcLoss} if opt.loss_type == 'lsep_nnh': loss_criterion = LsepLossHy(batch_size=batch_size//num_gpus,num_class=opt.num_class) loss_criterion.hybridize() elif opt.loss_type =='bce': loss_criterion = gloss.SigmoidBinaryCrossEntropyLoss() loss_criterion.hybridize() else: loss_criterion = loss_dict[opt.loss_type]() # net.initialize(mx.init.Xavier(), # ctx=context) # net parameter initialize in several cards net.initialize(mx.init.Xavier(),ctx=context) if not opt.pretrained is None: if opt.pretrained.endswith('.pkl'): net.load_from_caffe2_pickle(opt.pretrained) elif opt.pretrained.endswith('.params'): try: print("load pretrained params ",opt.pretrained) net.load_from_sym_params(opt.pretrained,ctx = context) except Exception as e: print("load as sym params failed,reload as gluon params") net.load_params(opt.pretrained,ctx=context) #load params to net context net.hybridize() trainer = gluon.Trainer(net.collect_params(),'sgd', {'learning_rate':opt.lr,'momentum':0.9,'wd':opt.wd}, kvstore=opt.kvstore) # the trainer lr_steps = lr_schedualer.MultiFactorScheduler(steps,opt.lr_schedualer_factor) lr_steps.base_lr = opt.lr best_eval = 0.0 for epoch in range(opt.num_epoch): tic = time() pre_loss,cumulative_loss = 0.0,0.0 trainer.set_learning_rate(lr_steps(epoch)) vis.log('Epoch %d learning rate %f'%(epoch,trainer.learning_rate)) for i,(data,label) in enumerate(train_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue Ls =[] with autograd.record(): for x,y in zip(data_list,label_list): y_hat = net(x) loss = loss_criterion(y_hat,y) Ls.append(loss) cumulative_loss +=nd.mean(loss).asscalar() for L in Ls: L.backward() trainer.step(data.shape[0]) if (i+1)%opt.log_interval ==0: vis.log('[Epoch %d,Iter %d ] training loss= %f'%( epoch,i+1,cumulative_loss-pre_loss )) vis.plot('loss',cumulative_loss-pre_loss) pre_loss =cumulative_loss if opt.debug: if (i+1)//(opt.log_interval)==3: break vis.log('[Epoch %d] training loss=%f'%(epoch,cumulative_loss)) vis.log('[Epoch %d] time used: %f'%(epoch,time()-tic)) vis.log('[Epoch %d] saving net') save_path = './{0}/{1}_test-val{2}.params'.format(opt.output, str(opt.dataset + opt.loss_type), str(epoch)) vis.log("save path %s" % (save_path)) net.save_parameters(save_path) best_iou=0.0 if opt.dataset=='ucf101' or opt.dataset =='ucf': acc = nd.array([0],ctx=mx.cpu()) test_iter = 0 for i,(data,label) in enumerate(val_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue for x,y in zip(data_list,label_list): y_hat = net(x) test_iter +=1 # single iter y_pred = y_hat.argmax(axis=1) acc += (y_pred == y.astype('float32')).mean().asscalar() # acc in cpu val_acc = acc.asscalar() / test_iter if (i+1) %(opt.log_interval)==0: logging.info("[Epoch %d,Iter %d],acc=%f" % (epoch,i,val_acc)) if opt.debug: if (i+1)//opt.log_interval ==3: break vis.plot('acc',val_acc) elif opt.dataset=='meitu': k=4 topk_inter = np.array([1e-4]*k) # a epsilon for divide not by zero topk_union = np.array([1e-4]*k) for i,(data,label) in enumerate(val_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue for x,y in zip(data_list,label_list): y_hat = net(x) pred_order = y_hat.argsort()[:,::-1] # sort and desend order #just compute top1 label pred_order_np = pred_order.asnumpy() y_np = y.asnumpy() if opt.debug: print("pred shape and target shape",pred_order_np.shape,y_np.shape) for pred_vec,y_vec in zip(pred_order_np,y_np): label_set =set([index for index,value in enumerate(y_vec) if value>0.1]) pred_topk = [set(pred_vec[0:k]) for k in range(1,k+1)] topk_inter +=np.array([len(p_k.intersection(label_set)) for p_k in pred_topk]) topk_union +=np.array([len(p_k.union(label_set)) for p_k in pred_topk]) if (i+1) %(opt.log_interval)==0: logging.info("[Epoch %d,Iter %d],time %s,Iou %s" % (epoch, i, \ tmm.strftime("%Y-%D:%H-%S"), \ str(topk_inter / topk_union))) for i in range(k): vis.plot('val_iou_{0}'.format(i+1),topk_inter[i]/topk_union[i]) if opt.debug: if (i + 1) // (opt.log_interval) == 2: break vis.log("""---------------------------------------- ----XXXX------finished------------------ ----------------------------------------""")
def train_VAE_GAN(vae_net, disc_net, train_features, test_features, test_results_dir, vae_parameters_path=None, batch_size=64, init_lr=0.001, pbp_weight=1, disc_loss_mul=10, n_epochs=200, n_solo_epochs=0, max_disc_loss=999, variable_pbp_weight='constant', pbp_weight_decay=1, CTX=d2l.try_gpu()): # VAE_net is a VAE network (most likely a ConvVAE with 512 latent variables # 32 base channels, 3 * 64 * 64 output shape # disc_net is a discriminator network (most likely a ResNet) # whose output of (batch_size, 1) # test_results_dir is the directory (must end with a slash /) that contains # the validation images after all epochs were run # vae_parameters_dir is the path (so directory + filename) that the trained # VAE's model parameters will be saved to. # n_solo_epochs indicate the number of epochs that the VAE will train using # no discriminator; n_solo_epochs must be smaller than n_epochs, and # the number of epochs trained with discriminator is # n_epochs - n_solo_epochs # max_disc_loss is the maximum loss beyond which the discriminator's loss # will not be used in updating VAE in the generator cycle # If variable_pbp_weight is False/None, then the pbp weight will remain # constant for all epochs (except when training solo, in which case # the pbp weight is adjusted to 1, but will revert back to the specified value # after solo epochs are done. # # If variable_pbp_weight is 'decay', then for every 25 combo epochs the # pbp_weight will decrease by constant factor. ############################################################################# ## MODEL INITIALIZATION AND TRAINER ############################################################################# # # Initialize the VAE network and get its trainer print( '[STATE]: Initializing model parameters and constructing Gluon trainers' ) # Set the pbp weight to the desired value vae_net.pbp_weight = pbp_weight vae_net.collect_params().initialize(mx.init.Xavier(), force_reinit=True, ctx=CTX) vae_trainer = gluon.Trainer(vae_net.collect_params(), 'adam', {'learning_rate': init_lr}) # Initialize the Disc network nd get its trainer disc_net.collect_params().initialize(mx.init.Xavier(), force_reinit=True, ctx=CTX) disc_trainer = gluon.Trainer(disc_net.collect_params(), 'adam', {'learning_rate': init_lr}) ############################################################################# ## Output file writer initialization ############################################################################# # # Open a file to write to for training statistics; the training statistics csv # will be written to the results directory csv_writer = None try: csv_writer = open(test_results_dir + 'training_statistics.csv', 'w') print('[STATE]: Writing training statistics to ' + test_results_dir + 'training_statistics.csv') except: print( '[ERROR]: test results directory not valid, writing training statistics to main directory' ) csv_writer = open('./training_statistics.csv', 'w') # CSV file needs to open with a header that is the column names csv_writer.write('epoch,vae_loss,disc_loss,time_consumed\n') # Open a file to write README.md for displaying validation images; the README # file will be written to the results directory readme_writer = None try: readme_writer = open(test_results_dir + 'README.md', 'w') print('[STATE]: Writing README report to ' + test_results_dir + 'README.md') except: print( '[ERROR]: test results directory not valid, writing readme to main directory' ) csv_writer = open('./README.md', 'w') # Write a few lines on README to indicate the hyper parameters readme_writer.write('n_latent:{} \n\n'.format(vae_net.n_latent)) readme_writer.write('n_base_channels:{} \n\n'.format( vae_net.n_base_channels)) if variable_pbp_weight == 'constant': readme_writer.write('pixel-by-pixel loss weight:{} \n\n'.format( vae_net.pbp_weight)) elif variable_pbp_weight == 'decay': readme_writer.write( 'pixel-by-pixel loss weight initially {} and decay by {} every 25 combo epochs \n\n' .format(vae_net.pbp_weight, pbp_weight_decay)) readme_writer.write('n_solo_epochs:{} \n\n'.format(n_solo_epochs)) readme_writer.write('n_combo_epochs:{} \n\n'.format(n_epochs - n_solo_epochs)) readme_writer.write('max_disc_loss :{} \n\n'.format(max_disc_loss)) ############################################################################# ## Data iterator ############################################################################# # # Load training features into an iterator train_iter = gdata.DataLoader(train_features, batch_size, shuffle=True, last_batch='keep') sample_size = train_features.shape[0] print('[STATE]: {} training samples loaded into iterator'.format( sample_size)) ############################################################################# ## Training parameters ############################################################################# # # Figure out the number of epochs trained with VAE and Discriminator together n_combo_epochs = n_epochs - n_solo_epochs print( '[STATE]: {} solo epochs and {} combo epochs are to be trained'.format( n_solo_epochs, n_combo_epochs)) ############################################################################# ## Training ############################################################################# # # Print a message, then start training print('[STATE]: Training started') # First train the solo rounds; when training the solo rounds, PBP weight # is 1; however it needs to be changed back to the specified constant # when ConvVAE is initialized, or a variable PBP weight, so I will keep # a copy of the specified PBP weight to be used later specified_pbp_weight = vae_net.pbp_weight vae_net.pbp_weight = 1 for epoch in range(n_solo_epochs): # Initialize a list that records the average VAE loss per batch batch_losses = [] epoch_start_time = time.time() # Iterate through the epochs for batch_features in train_iter: # Load the batch into the appropriate context batch_features = batch_features.as_in_context(CTX) # Compute loss, gradient, and update paramters using trainer with autograd.record(): loss = vae_net(batch_features) loss.backward() vae_trainer.step(batch_features.shape[0]) # Compute the mean loss among the batch, append it onto the batch # losses list batch_losses.append(nd.mean(loss).asscalar()) # Compute the training loss per epoch by a mean of batch losses epoch_train_loss = np.mean(batch_losses) epoch_stop_time = time.time() time_consumed = epoch_stop_time - epoch_start_time # Generate the epoch report and write it to the README file # and print it epoch_report_str = 'Epoch{}, Training loss {:.10f}, Time used {:.2f}'.format( epoch, epoch_train_loss, time_consumed) readme_writer.write(epoch_report_str + '\n\n') print('[STATE]: ' + epoch_report_str) # Now that all solo rounds are over, revert the PBP weight of the vae back to the original # specified value vae_net.pbp_weight = specified_pbp_weight # Now train the combo rounds; we will use BinarySigmoidCrossEntropyLoss() # for discriminator loss disc_loss_func = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) # Define an integer that is 0 if discriminator loss in an epoch is larger than # the specified max_disc_loss (discriminator is bad, don't follow it) # Before any training, set it to 1 use_disc_loss = 1 for epoch in range(n_solo_epochs, n_epochs): start_time = time.time() # Initialize the lists that records the average loss within each batch vae_batch_losses = [] disc_batch_losses = [] # Iterate through the batches for batch_features in train_iter: # Load the batch into the appropriate context batch_features = batch_features.as_in_context(CTX) # Record the batch_size because it may not be the specified batch size act_batch_size = batch_features.shape[0] # Generate some 1s and 0s for distinguishing genuine images from # generated images genuine_labels = nd.ones((act_batch_size, ), ctx=CTX) generated_labels = nd.zeros((act_batch_size, ), ctx=CTX) ############################################################################ # UPDATE THE DISCRIMINATOR NETWORK ############################################################################ with autograd.record(): # Train with genuine images: make predictions on genuine images genuine_logit_preds = disc_net(batch_features) genuine_loss = disc_loss_func(genuine_logit_preds, genuine_labels) # Train with generated images: make predictions on generated images generated_features = vae_net.generate(batch_features) generated_logit_preds = disc_net(generated_features) generated_loss = disc_loss_func(generated_logit_preds, generated_labels) # Total loss is loss with genuine and with generated images disc_loss = genuine_loss + generated_loss disc_loss.backward() disc_batch_losses.append(nd.mean(disc_loss).asscalar()) # Update the parameters in the convolutional discriminator disc_trainer.step(act_batch_size) ############################################################################ # UPDATE THE VAE NETWORK ############################################################################ with autograd.record(): # Compute the discriminator loss by letting the discriminator network # make predictions on the generated images generated_features = vae_net.generate(batch_features) generated_logit_preds = disc_net(generated_features) batch_disc_loss = disc_loss_func(generated_logit_preds, genuine_labels) # Sum up the VAE loss and the discriminator loss (with multiplier of 10) # Then multiply batch_disc_loss by an integer # that is 1 if gen_loss = vae_net( batch_features ) + batch_disc_loss * disc_loss_mul * use_disc_loss gen_loss.backward() # Record the VAE batch loss's average vae_batch_losses.append(nd.mean(gen_loss).asscalar()) # Update the parameters in the VAE network vae_trainer.step(act_batch_size) ############################################################################ # NEAR THE END OF THIS EPOCH ############################################################################ # Compute some summarical metrics of this epoch stop_time = time.time() time_consumed = stop_time - start_time epoch_disc_train_loss = np.mean(disc_batch_losses) epoch_vae_train_loss = np.mean(vae_batch_losses) # If variable_pbp_weight is set to decay, then decay the pbp weight if variable_pbp_weight == 'decay': if (1 + epoch) % 25 == 0: vae_net.pbp_weight = vae_net.pbp_weight * pbp_weight_decay print('VAE PBP weight adjusted to {:.10f}'.format( vae_net.pbp_weight)) # Check if discriminator is good enough at the end of this epoch # if good enough, keep use_disc_loss at 1 if epoch_disc_train_loss <= max_disc_loss: use_disc_loss = 1 else: # Note that even if use_disc_loss is set to 0 # discriminator will still be trained in the next epoch, # just its loss not used in the VAE update cycle use_disc_loss = 0 # Generate the README line and the csv line, and write them epoch_README_report = 'Epoch{}, VAE Training loss {:.5f}, ResNet Training loss {:.10f}, Time used {:.2f}' epoch_README_report = epoch_README_report.format( epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed) epoch_CSV_report = '{},{:.10f},{:.10f},{:.2f}'.format( epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed) readme_writer.write(epoch_README_report + '\n\n') csv_writer.write(epoch_CSV_report + '\n') print('[STATE]: ' + epoch_README_report) ############################################################################ # END OF TRAINING, now onto the validation process ############################################################################ # Close the CSV writer because there is nothing left to write csv_writer.close() # Save model parameters; if vae_parameters_path is not valid, do not save it try: vae_net.save_parameters(vae_parameters_path) except: print( '[ERROR]: VAE parameters path is not valid; parameters will be saved to main directory' ) vae_net.save_parameters('./recent_model.params') # Define the number of validation images to generate # then use the vae_net to generate them n_validations = 10 img_arrays = vae_net.generate( test_features[0:n_validations].as_in_context(CTX)).asnumpy() for i in range(n_validations): # Write a line in the README report the displaying the generated images readme_writer.write('![' + str(i) + '](./' + str(i) + '.png)') # Reshape the output from (n_channels, width, height) to (width, height, n_channels) # Note that the vae_net instance already has such information regarding # the training images img_array = img_arrays[i].reshape( (vae_net.out_width, vae_net.out_height, vae_net.n_channels)) # Show the plot, save it. If test_results_dir is not valid, # save it to main directory plt.imshow(img_array) try: plt.savefig(test_results_dir + str(i) + '.png') print('[STATE]: ' + test_results_dir + str(i) + '.png' + ' saved') except: print( '[ERROR]: test results directory not valid, saving images to main directory' ) plt.savefig('./' + str(i) + '.png') plt.close() # Close the README writer readme_writer.close()
def train_cnn(args): #use softmax with sigmoid cross entropy loss opt = args vis = Visulizer(env=opt.env) vis.log(opt) batch,context,steps = parse_basic(opt) train_loader,val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, device_id=decoder_gpu, batch_size=batch, num_workers=opt.num_workers, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h = opt.scale_h, scale_w = opt.scale_w) net = R2Plus2D(num_class=63,model_depth=34,final_spatial_kernel=opt.crop_size//8,final_temporal_kernel=opt.n_frame//16) net.initialize(mx.init.Xavier(),ctx=context) if not opt.pretrained is None: if opt.pretrained.endswith('.pkl'): net.load_from_caffe2_pickle(opt.pretrained) elif opt.pretrained.endswith('.params'): try: print("load pretrained params ", opt.pretrained) net.load_from_sym_params(opt.pretrained, ctx=context) except Exception as e: print("load as sym params failed,reload as gluon params") net.load_params(opt.pretrained, ctx=context) # load params to net context lr_opts= {'learning_rate': opt.lr, 'momentum': 0.9, 'wd': opt.wd} trainer = Trainer(net.collect_params(),'sgd',lr_opts,kvstore=opt.kvstore) lr_steps = MultiFactorScheduler(steps,opt.lr_scheduler_factor) loss_criterion = gloss.SigmoidBinaryCrossEntropyLoss() for epoch in range(opt.num_epoch): tic = time() pre_loss,cumulative_loss= 0.0,0.0 trainer.set_learning_rate(lr_steps(epoch)) for i,(data,label) in enumerate(train_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch=0) except Exception as e: vis.log(e) continue Ls=[] with autograd.record(): for x,y in zip(data_list,label_list): y_hat = net(x) loss = loss_criterion(y_hat,y) Ls.append(loss) cumulative_loss +=nd.mean(loss).asscalar() for L in Ls: L.backward() trainer.step(data.shape[0]) if (i+1)%opt.log_interval ==0: vis.log('Epoch %d,Iter %d,Training loss=%f'%(epoch,i+1, cumulative_loss-pre_loss)) pre_loss = cumulative_loss if opt.debug: break vis.log('[Epoch %d],trainning loss %f'%(epoch,cumulative_loss)) vis.log('[Epoch %d],time used:%f'%(epoch,time()-tic)) vis.log('[Epoch %d] saving net') save_path = './output/encoder_cnn-{0}.params'.format(str(epoch)) vis.log('save path %d'%(save_path)) net.save_parameters(save_path)
def Train(directory, epochs, aggregator, embedding_size, layers, dropout, slope, lr, wd, random_seed, ctx): dgl.load_backend('mxnet') random.seed(random_seed) np.random.seed(random_seed) mx.random.seed(random_seed) g, disease_ids_invmap, mirna_ids_invmap = build_graph( directory, random_seed=random_seed, ctx=ctx) samples = sample(directory, random_seed=random_seed) ID, IM = load_data(directory) print('## vertices:', g.number_of_nodes()) print('## edges:', g.number_of_edges()) print('## disease nodes:', nd.sum(g.ndata['type'] == 1).asnumpy()) print('## mirna nodes:', nd.sum(g.ndata['type'] == 0).asnumpy()) samples_df = pd.DataFrame(samples, columns=['miRNA', 'disease', 'label']) sample_disease_vertices = [ disease_ids_invmap[id_] for id_ in samples[:, 1] ] sample_mirna_vertices = [ mirna_ids_invmap[id_] + ID.shape[0] for id_ in samples[:, 0] ] kf = KFold(n_splits=5, shuffle=True, random_state=random_seed) train_index = [] test_index = [] for train_idx, test_idx in kf.split(samples[:, 2]): train_index.append(train_idx) test_index.append(test_idx) auc_result = [] acc_result = [] pre_result = [] recall_result = [] f1_result = [] fprs = [] tprs = [] for i in range(len(train_index)): print( '------------------------------------------------------------------------------------------------------' ) print('Training for Fold ', i + 1) samples_df['train'] = 0 samples_df['test'] = 0 samples_df['train'].iloc[train_index[i]] = 1 samples_df['test'].iloc[test_index[i]] = 1 train_tensor = nd.from_numpy( samples_df['train'].values.astype('int32')).copyto(ctx) test_tensor = nd.from_numpy( samples_df['test'].values.astype('int32')).copyto(ctx) edge_data = {'train': train_tensor, 'test': test_tensor} g.edges[sample_disease_vertices, sample_mirna_vertices].data.update(edge_data) g.edges[sample_mirna_vertices, sample_disease_vertices].data.update(edge_data) train_eid = g.filter_edges(lambda edges: edges.data['train']).astype( 'int64') g_train = g.edge_subgraph(train_eid, preserve_nodes=True) g_train.copy_from_parent() # get the training set rating_train = g_train.edata['rating'] src_train, dst_train = g_train.all_edges() # get the testing edge set test_eid = g.filter_edges(lambda edges: edges.data['test']).astype( 'int64') src_test, dst_test = g.find_edges(test_eid) rating_test = g.edges[test_eid].data['rating'] src_train = src_train.copyto(ctx) src_test = src_test.copyto(ctx) dst_train = dst_train.copyto(ctx) dst_test = dst_test.copyto(ctx) print('## Training edges:', len(train_eid)) print('## Testing edges:', len(test_eid)) # Train the model model = GNNMDA( GraphEncoder(embedding_size=embedding_size, n_layers=layers, G=g_train, aggregator=aggregator, dropout=dropout, slope=slope, ctx=ctx), BilinearDecoder(feature_size=embedding_size)) model.collect_params().initialize( init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ctx=ctx) cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) trainer = gluon.Trainer(model.collect_params(), 'adam', { 'learning_rate': lr, 'wd': wd }) for epoch in range(epochs): start = time.time() for _ in range(10): with mx.autograd.record(): score_train = model(g_train, src_train, dst_train) loss_train = cross_entropy(score_train, rating_train).mean() loss_train.backward() trainer.step(1) h_val = model.encoder(g) score_val = model.decoder(h_val[src_test], h_val[dst_test]) loss_val = cross_entropy(score_val, rating_test).mean() train_auc = metrics.roc_auc_score( np.squeeze(rating_train.asnumpy()), np.squeeze(score_train.asnumpy())) val_auc = metrics.roc_auc_score(np.squeeze(rating_test.asnumpy()), np.squeeze(score_val.asnumpy())) results_val = [ 0 if j < 0.5 else 1 for j in np.squeeze(score_val.asnumpy()) ] accuracy_val = metrics.accuracy_score(rating_test.asnumpy(), results_val) precision_val = metrics.precision_score(rating_test.asnumpy(), results_val) recall_val = metrics.recall_score(rating_test.asnumpy(), results_val) f1_val = metrics.f1_score(rating_test.asnumpy(), results_val) end = time.time() print('Epoch:', epoch + 1, 'Train Loss: %.4f' % loss_train.asscalar(), 'Val Loss: %.4f' % loss_val.asscalar(), 'Acc: %.4f' % accuracy_val, 'Pre: %.4f' % precision_val, 'Recall: %.4f' % recall_val, 'F1: %.4f' % f1_val, 'Train AUC: %.4f' % train_auc, 'Val AUC: %.4f' % val_auc, 'Time: %.2f' % (end - start)) h_test = model.encoder(g) score_test = model.decoder(h_test[src_test], h_test[dst_test]) # loss_test = cross_entropy(score_test, rating_test).mean() fpr, tpr, thresholds = metrics.roc_curve( np.squeeze(rating_test.asnumpy()), np.squeeze(score_test.asnumpy())) test_auc = metrics.auc(fpr, tpr) results_test = [ 0 if j < 0.5 else 1 for j in np.squeeze(score_test.asnumpy()) ] accuracy_test = metrics.accuracy_score(rating_test.asnumpy(), results_test) precision_test = metrics.precision_score(rating_test.asnumpy(), results_test) recall_test = metrics.recall_score(rating_test.asnumpy(), results_test) f1_test = metrics.f1_score(rating_test.asnumpy(), results_test) print('Fold:', i + 1, 'Test Acc: %.4f' % accuracy_test, 'Test Pre: %.4f' % precision_test, 'Test Recall: %.4f' % recall_test, 'Test F1: %.4f' % f1_test, 'Test AUC: %.4f' % test_auc) auc_result.append(test_auc) acc_result.append(accuracy_test) pre_result.append(precision_test) recall_result.append(recall_test) f1_result.append(f1_test) fprs.append(fpr) tprs.append(tpr) print('## Training Finished !') print( '----------------------------------------------------------------------------------------------------------' ) return auc_result, acc_result, pre_result, recall_result, f1_result, fprs, tprs
def __init__(self, feature_dict, args, ctx, task,**kwargs): """{"sparse":[SingleFeat],"dense":[SingleFeat]}""" super(DeepFM, self).__init__(**kwargs) # ?? util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX) # self.feature_sizes = args.FEATURE_SIZE self.field_size = args.FIELD_NUM self.feature_dict = feature_dict print('field_size:') print(self.field_size) if args.TASK == 'finish': self.embedding_size = args.FINISH_EMBEDDING_SIZE self.batch_size = args.FINISH_BATCH_SIZE else: self.embedding_size = args.LIKE_EMBEDDING_SIZE self.batch_size = args.LIKE_BATCH_SIZE self.config_name = args.CONFIG_NAME # self.dropout_prob = args.DROPOUT_PROB self.task = task # self.loss = gloss.SigmoidBinaryCrossEntropyLoss() if args.LOSS == 'l2loss': self.loss = gloss.L2Loss() else: self.loss = gloss.SigmoidBinaryCrossEntropyLoss() self.ctx = ctx self.embedding_dict = OrderedDict() self.dense_dict = OrderedDict() with self.name_scope(): if self.task == 'finish': self.layer_list = [np.int(x) for x in args.FINISH_LAYER] self.dropout = args.FINISH_DROPOUT_PROB else: self.layer_list = [np.int(x) for x in args.LIKE_LAYER] self.dropout = args.LIKE_DROPOUT_PROB self.params.get('v',shape=(self.field_size,self.embedding_size)) self.dnn_out = nn.Dense(1,use_bias=False) self.register_child(self.dnn_out) for feat in feature_dict['sparse']: self.embedding_dict[feat.feat_name] = nn.Embedding(feat.feat_num, self.embedding_size) self.register_child(self.embedding_dict[feat.feat_name]) for feat in feature_dict['dense']: self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size) self.register_child(self.dense_dict[feat.feat_name]) for emb_k, emb_v in self.embedding_dict.items(): self.register_child(emb_v) for den_k, den_v in self.dense_dict.items(): self.register_child(den_v) self.linear_logit_dense = nn.Dense(1,use_bias=False) self.linear_logit_bn = nn.BatchNorm() self.linear_logit_embedding_bn = nn.BatchNorm() self.register_child(self.linear_logit_dense) self.register_child(self.linear_logit_bn) self.register_child(self.linear_logit_embedding_bn) self.bn_embedding = nn.BatchNorm() self.register_child(self.bn_embedding) self.dense_list = [] self.dropout_list = [] self.bn_list = [] self.activation_list = [] for i in range(len(self.layer_list)): self.dense_list.append(nn.Dense(self.layer_list[i])) self.dropout_list.append(nn.Dropout(self.dropout)) self.bn_list.append(nn.BatchNorm()) self.activation_list.append(nn.Activation('relu')) self.register_child(self.dense_list[i]) self.register_child(self.dropout_list[i]) self.register_child(self.bn_list[i]) self.register_child(self.activation_list[i])
def train_GAS_ch9(model, data_utils, batch_size, lr, num_epochs, ctx): model.initialize(init.Xavier(), force_reinit=True, ctx=ctx) trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': lr}) loss = d2l.MaskedSoftmaxCELoss() loss1 = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) loss2 = gloss.L2Loss() animator = d2l.Animator(xlabel='epoch', ylabel='loss', xlim=[1, num_epochs], ylim=[0, 0.25]) for epoch in range(1, num_epochs + 1): timer = d2l.Timer() metric = d2l.Accumulator(2) # loss_sum, num_tokens l_sum, l_class_sum, l_score_sum, n, acc_sum = 0.0, 0.0, 0.0, 0, 0.0 rmse_sum = nd.array([0, 0, 0]) data_iter = data_utils.get_batch_train(batch_size) # ti=0 for i, (X, Y) in enumerate(data_iter): # print(X.shape,Y.shape) ##X=(128, 10, 16) (128, 5) # exit() X, Y = nd.array(X).as_np_ndarray(), nd.array(Y).as_np_ndarray() # print("after reshape",X.shape, Y.shape) ##after reshape (128, 10, 16) (128, 5) # exit() # vlinz=nd.random.randint(0,10,(X.shape[0],)).as_np_ndarray() # vlinz=nd.ones((X.shape[0],)).as_np_ndarray() valid_len = np.repeat(np.array([X.shape[1]]), X.shape[0]) # print(valid_len.shape) ##(128,) # exit() # ti+=1 # print('keepup',ti) with autograd.record(): dec_output = model(X, valid_len) # print(dec_output.shape) ##(128, 10, 5) # exit() # ################### # l = loss(dec_output, Y, vlinz) # l.backward() # d2l.grad_clipping(model, 1) # num_tokens = vlinz.sum() # trainer.step(num_tokens) # metric.add(l.sum(), num_tokens) # # exit() # if epoch % 10 == 0: # animator.add(epoch, (metric[0] / metric[1],)) # print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' # f'tokens/sec on {str(ctx)}') # ########################## output = dec_output.as_nd_ndarray() cl_res, score_res = class_and_score_forward(output) # print("shape of cl_res:",cl_res.shape,"shape of Y[0][:,:3]:",Y.shape) # print("shape of score_res:",score_res.shape,"shape of Y[0][:,3:]:",Y.shape) cl_weight, conc_weight = nd.ones_like(cl_res), nd.ones_like( score_res) l_class = loss1(cl_res.as_np_ndarray(), Y[:, :3], cl_weight.as_np_ndarray()).sum() l_conc = loss2(score_res.as_np_ndarray(), Y[:, 3:], conc_weight.as_np_ndarray()).sum() n = Y.shape[0] l = (l_class / n) + (l_conc / n) l.backward() d2l.grad_clipping(model, 1) num_tokens = n trainer.step(num_tokens) metric.add(l.sum(), num_tokens) if epoch % 10 == 0: animator.add(epoch, (metric[0] / metric[1], )) print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' f'tokens/sec on {str(ctx)}')
def _define_loss(self): self.loss_f = loss.SigmoidBinaryCrossEntropyLoss()
def FMloss(self, y_hat, y, l1=None, lossf='sigmodbinary'): loss = gloss.SigmoidBinaryCrossEntropyLoss() return loss(y_hat, y) + ( 0.0 if not l1 else l1 * (nd.sum(nd.abs(self.w.data())) + nd.sum(nd.abs(self.b.data())) + nd.sum(nd.abs(self.latent_vec.data()))))
nn.Conv2D(ndf * 2, 4, 2, 1, use_bias=False), nn.BatchNorm(), nn.LeakyReLU(0.2), nn.Conv2D(ndf * 4, 4, 2, 1, use_bias=False), nn.BatchNorm(), nn.LeakyReLU(0.2), nn.Conv2D(ndf * 8, 4, 2, 1, use_bias=False), nn.BatchNorm(), nn.LeakyReLU(0.2), nn.Conv2D(1, 4, 1, 0, use_bias=False, activation='sigmoid')) def forward(self, input): return self.main(input) netD = Discriminator() netD.initialize(init=initializer.Normal(0.002), ctx=ctx) # print(netD) criteion = gloss.SigmoidBinaryCrossEntropyLoss() fixed_noise = nd.random_normal(0, 1, (64, nz, 1, 1), ctx=ctx) # print(fixed_noise.shape) tranerG = gluon.Trainer(netG.collect_params(), 'adam', { 'learning_rate': lr, 'beta1': beta1 }) tranerD = gluon.Trainer(netD.collect_params(), 'adam', { 'learning_rate': lr, 'beta1': beta1 }) img_list = [] G_losses = []