def main(**kwargs): args = DefaultConfig() args.parse(kwargs) train_iter, val_iter, test_iter, args.vocab_size, vectors = util.load_data( args, args.text_type) args.print_config() # model if args.model_path: # 加载模型 saved_model = torch.load(args.model_path) config = saved_model['config'] config.device = args.device model = getattr(models, args.model)(args, vectors) model.load_state_dict(saved_model['state_dict']) best_score = saved_model['best_score'] print('Load model from {}!'.format(args.model_path)) else: print("No trained model!") if not torch.cuda.is_available(): config.cuda = False config.device = None if args.cuda: torch.cuda.set_device(args.device) model.cuda() probs = infer(model, test_iter, config) result_path = 'result/' + '{}_{}_{}'.format(args.model, args.id, args.best_score) np.save('{}.npy'.format(result_path), probs) print('Prob result {}.npy saved!'.format(result_path))
def train(**kwargs): # 根据命令行参数更新配置 opt = DefaultConfig() opt.parse(kwargs) print("参数配置完成") # step1: 模型 os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu_num model = getattr(models, opt.model)(opt, 512) # TODO:512这个值后期可能需要用变量代替 if opt.model == "StatisticModel" and model.PCA_state is False: model.pretrainpca() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() print("模型加载完成") # step2: 数据 train_data = VideoSet(opt, state='train') train_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) print("数据集准备就绪") # step3: 目标函数和优化器 criterion = torch.nn.BCELoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: 统计指标 print("开始训练") # 训练 for epoch in range(opt.max_epoch): for ii, (data, label) in enumerate(train_dataloader): # 训练模型参数 input_data = Variable(data) target = Variable(label) # ipdb.set_trace() if opt.use_gpu: input_data = input_data.cuda() target = target.cuda() optimizer.zero_grad() score = model(input_data) loss = criterion(score, target) loss.backward() optimizer.step() # 最后保存一次模型参数 model.save()
def train(**kwargs): """ 训练 """ #根据命令行参数更新配置 opt.parse(kwargs) #可视化 #vis = Visualizer(opt.env) #step1 加载模型: model = getattr(models,opt.model)() if opt.load_model_path: model.load(opt.load_model_path)#加载训练好的参数 if opt.use_gpu:model.cuda() #step2 数据: train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers) #step3:目标函数和优化器 criterion = torch.nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=opt.weight_decay) #统计指标:平滑处理之后的损失,还有混淆矩阵 #######################待补充######################### #训练 for epoch in range(opt.max_epoch): for ii,(data,label) in enumerate(train_dataloader): #训练模型 if opt.use_gpu: input = input.cuda() target = input.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() #更新统计指标及可视化 if ii%opt.print_freq == opt.print_freq - 1: print('ii:{},loss:{}'.format(ii,loss)) model.save()
def inference(**kwargs): # 根据命令行参数更新配置 opt = DefaultConfig() opt.parse(kwargs) print("参数配置完成") if opt.model_type == 1: model_type = "model_1" elif opt.model_type == 2: model_type = "model_2_" + opt.model_2_layers elif opt.model_type == 3: model_type = "model_3" # 加载静态图 model_files = opt.model_path + model_type + "\\data_SNR_" + str(opt.SNR) saver = tf.train.import_meta_graph(model_files + "\\data_SNR_" + str(opt.SNR) + ".meta") # 开始测试 config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = opt.per_process_gpu_memory_fraction with tf.Session(config=config) as sess: # 加载参数值 saver.restore(sess, tf.train.latest_checkpoint(model_files)) # 定义测试集dataset test_dataset = CSISet(opt.test_data_path, opt.batch_size, False, "test") data_loss = [] # 保存每个batch的发送信号和预测的发送信号之间的误差 print("开始预测过程!") start_time = time.time() for ii, (batch_x, batch_tx, batch_rx) in enumerate(test_dataset.get_data()): inputs = tf.get_collection("input_batch")[0] predictions = tf.get_collection("predictions")[0] # pred_H是模型预测的信道完整特性,维度是[batch, 72, 14, 2] # 再利用公式 H=rx/tx 和(pred_H, batch_rx)就可以得到pred_rx pred_H = np.squeeze(np.array( sess.run([predictions], feed_dict={inputs: batch_x})), axis=0) complex_pred_H = pred_H[:, :, :, 0] + pred_H[:, :, :, 1] * 1j # ipdb.set_trace() pred_batch_tx = np.divide(batch_rx, complex_pred_H) pred_batch_tx[:, :5, 5:7] = 1. pred_batch_tx[:, 67:72, 5:7] = 1. batch_tx[:, :5, 5:7] = 1. batch_tx[:, 67:72, 5:7] = 1. batch_data_loss_ratio = np.mean( np.divide(abs(pred_batch_tx - batch_tx), abs(batch_tx))) # print(batch_data_loss) print("第%d个batch的发送信息预测平均误差是%.6f" % (ii + 1, batch_data_loss_ratio)) data_loss.append(batch_data_loss_ratio) result = np.mean(data_loss) print("信噪比为%d时模型在测试集上的平均估计误差为%.2f" % (opt.SNR, result)) end_time = time.time() print_time(start_time, end_time, "整个测试过程") result_path = opt.result_path + model_type + "\\data_SNR_" + str( opt.SNR) + "\\test\\result.npy" np.save(result_path, data_loss)
results.extend(probability.cpu().numpy()) write_csv(ids, results, breed, opt.result_file) if __name__ == '__main__': opt = DefaultConfig() # opt.parse() # m = getattr(model, opt.model)() # # print(m) # for param in m.parameters(): # print(param.requires_grad) opt.parse({'model': 'ResNet50','max_epoch': 30, 'load_model_path': './checkpoints/ResNet50_19'}) # train(opt) test(opt) ''' m = getattr(model, opt.model)().cuda() train_dataloader = dataloader(opt.train_data_root, train=True, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) for (data, label) in train_dataloader: m.train() data = data.cuda() label = label.cuda() score = m(data) print('train: ', score.size())
def train(**kwargs): # step1 : customize config opt = DefaultConfig() opt.parse(kwargs) # step2 : model # TODO: make the initiation correspond to the data # Problem : when node_num is greater than 5M, the depth of encoder hidden layers can not be greater than 3 # the number of parameters will be very large print('Initiate model') model = getattr(models, opt.model)(opt.num_units, 1, opt.d) print(model) # if opt.load_model_path: # model.load(opt.load_model_path) if opt.use_gpu: model.cuda() model.train() print('Initiate Train_Data') # step3 : prepare data. In this case, only training data is needed train_data = DataLoader(SDNEData(opt.train_data_root, opt.file_name, opt.beta), shuffle=True, num_workers=1, pin_memory=False) print('Initiate Optimizer and Loss function') # step4 : optimizer optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.99, nesterov=True, weight_decay=1e-5) loss_func = MyLoss(1, 1, 1e-5, opt.node_num).cuda() total_time = 0.0 for epoch in range(0, opt.max_epoch): epoch_st_time = time.time() # Dimension: x1,x2, num_node ; a1,a2, num_node+1 , X_ij, num_node for ii, (x1, x2, a1, a2, x_ij) in enumerate(train_data): if ii == 10: break if opt.use_gpu: x1 = x1.cuda() x2 = x2.cuda() a1 = a1.cuda() a2 = a2.cuda() x_ij = x_ij.cuda() x_diff1, y1 = model(x1) x_diff2, y2 = model(x2) y_diff = y2 - y1 optimizer.zero_grad() loss = loss_func(x_diff1, x_diff2, y_diff, a1, a2, x_ij) # print(torch.cuda.memory_allocated()) # print(torch.cuda.max_memory_allocated()) # # if(ii%4000000 == 0): # print(loss) start_time = time.time() loss.backward(retain_graph=False) torch.cuda.synchronize() end_time = time.time() print('backward time = ' + str(end_time - start_time)) optimizer.step() epoch_end_time = time.time() total_time += epoch_end_time - epoch_st_time print("total_time is " + str(total_time) + 's\n')
def main(**kwargs): config = DefaultConfig() config.parse(kwargs) config.env = str(config.id) vis = Visualizer # set random seed # cpu and gpu both need to set torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) np.random.seed(config.seed) random.seed(config.seed) if not torch.cuda.is_available(): config.cuda = False config.device = None train_iter, test_iter, emb_vectors = utils.load_data(config) config.print_config() model = getattr(models, config.model)(config, emb_vectors) print(model) if config.cuda: torch.cuda.set_device(config.device) model.cuda() # 目标函数和优化器 loss_f = F.cross_entropy lr1, lr2 = config.lr1, config.lr2 optimizer = model.get_optimizer(lr1, lr2) model.train() for epoch in range(config.max_epochs): start_time = time.time() total_loss = 0.0 correct = 0 total = 0 for batch_i, batch in enumerate(train_iter): text, label = batch.text[0], batch.label if config.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) loss = loss_f(pred, label) loss.backward() optimizer.step() total_loss += loss.item() predicted = pred.max(dim=1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if (batch_i + 1) % (10000 // config.batch_size) == 0: # 10000条训练数据输出一次统计指标 print('[Epoch {}] loss: {:.5f} | Acc: {:.3f}%({}/{})'.format( epoch + 1, total_loss, 100.0 * correct / total, correct, total)) train_acc, train_acc_n, train_n = val(model, train_iter, config) print('Epoch {} time spends : {:.1f}s'.format(epoch + 1, time.time() - start_time)) print('Epoch {} Train Acc: {:.2f}%({}/{})'.format( epoch + 1, train_acc, train_acc_n, train_n)) test_acc, test_acc_n, test_n = val(model, test_iter, config) print('Epoch {} Test Acc: {:.2f}%({}/{})\n'.format( epoch + 1, test_acc, test_acc_n, test_n))
def main(**kwargs): args = DefaultConfig() args.parse(kwargs) args.model = 'LSTM' args.device = 0 args.id = 'word4' if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train_iter, val_iter, test_iter, args.vocab_size, vectors = data.load_data(args) args.print_config() global best_score # init model model = getattr(models, args.model)(args, vectors) print(model) # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join(args.save_dir, '{}_{}.pth'.format(args.model, args.id)) if args.cuda: torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = args.lr1, args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) for i in range(args.max_epochs): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(batch) == 1: continue text, label = batch.text, batch.label if args.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) loss = criterion(pred, label) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if idx % 80 == 79: print('[{}, {}] loss: {:.3f} | Acc: {:.3f}%({}/{})'.format(i + 1, idx + 1, total_loss / 20, 100. * correct / total, correct, total)) total_loss = 0.0 # 计算再验证集上的分数,并相应调整学习率 f1score = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = { 'state_dict': model.state_dict(), 'config': args } torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path, map_location={'cuda:5':'cuda:0'})['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format(best_score)) break # 保存训练最终的模型 args.best_score = best_score final_model = { 'state_dict': model.state_dict(), 'config': args } best_model_path = os.path.join(args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, best_score)) torch.save(final_model, best_model_path) print('Best Final Model saved in {}'.format(best_model_path)) # 在测试集上运行模型并生成概率结果和提交结果 if not os.path.exists('result/'): os.mkdir('result/') probs, test_pred = test(model, test_iter, args) result_path = 'result/' + '{}_{}_{}'.format(args.model, args.id, args.best_score) np.save('{}.npy'.format(result_path), probs) print('Prob result {}.npy saved!'.format(result_path)) test_pred[['id', 'class']].to_csv('{}.csv'.format(result_path), index=None) print('Result {}.csv saved!'.format(result_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
def main(**kwargs): start_time = time.time() config = DefaultConfig() config.parse(kwargs) vis = Visualizer(config.env) if not torch.cuda.is_available(): config.cuda = False config.device = None torch.manual_seed(config.seed) train_iter, val_iter, test_iter, config.vocab_size, config.target_vocab_size, config.aspect_vocab_size, \ text_vectors, target_vectors, aspect_vectors = data.load_data(config) # 需要进一步处理样本不均衡 config.print_config() # init model model = getattr(models, config.model)(config, text_vectors, target_vectors, aspect_vectors) print(model) # 模型保存位置 if not os.path.exists(config.save_dir): os.mkdir(config.save_dir) tmp_save_path = os.path.join(config.save_dir, 'entnet_{}.pth'.format(config.id)) if config.cuda: torch.cuda.set_device(config.device) torch.cuda.manual_seed(config.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = config.lr1, config.lr2 optimizer = model.get_optimizer(lr1, lr2) global best_acc best_acc = 0.0 # 开始训练 for i in range(config.max_epoch): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): text, target, aspect, label = batch.text, batch.target, batch.aspect, batch.label if config.cuda: text, target, aspect, label = text.cuda(), target.cuda(), aspect.cuda(), label.cuda() optimizer.zero_grad() pred = model(text, target, aspect) loss = criterion(pred, label) loss.backward(retain_graph=True) optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(dim=1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() # 每个batch之后计算测试集上的准确率 print('[Epoch {}] loss: {:.5f} | Acc: {:.3f}%({}/{})'.format(i + 1, total_loss, 100. * correct / total, correct, total)) vis.plot('loss', total_loss) # 每5个epoch计算验证集上的准确率,并相应调整学习率 if i % 5 == 4: acc, acc_n, val_n = val(model, val_iter, config) vis.plot('val_acc', acc) print('Epoch {} Val Acc: {:.3f}%({}/{})'.format(i + 1, acc, acc_n, val_n)) # 100个epoch之后模型接近收敛,此时开始调整学习率 # 因为数据集偏小,100个epoch之前虽然整体呈下降趋势,但会有小幅度波动,此时调整学习率可能会影响模型收敛 if i > 100: if acc >= best_acc: best_acc = acc checkpoint = { 'state_dict': model.state_dict(), 'config': config } torch.save(checkpoint, tmp_save_path) # if acc < best_acc: # model.load_state_dict(torch.load(tmp_save_path)['state_dict']) # lr1 *= config.lr_delay # optimizer = model.get_optimizer(lr1, lr2) # print('## load previous best model: {:.3f}%'.format(best_acc)) # print('## set model lr1 to {}'.format(lr1)) # if lr1 < config.min_lr: # print('## training over, best f1 acc : {:.3f}'.format(best_acc)) # break # 计算测试集上分数(准确率) test_acc, test_acc_n, test_n = val(model, test_iter, config) vis.plot('test_acc', test_acc) print('Epoch {} Test Acc: {:.3f}%({}/{})\n'.format(i + 1, test_acc, test_acc_n, test_n)) # 加载训练过程中保存的验证集最佳模型 # 计算最终训练模型的测试集准确率 model.load_state_dict(torch.load(tmp_save_path)['state_dict']) print('Load tmp best model from {}'.format(tmp_save_path)) test_acc, test_acc_n, test_n = val(model, test_iter, config) print('Finally Test Acc: {:.3f}%({}/{})'.format(test_acc, test_acc_n, test_n)) # print('Best final model saved in {}'.format('{:.3f}_{}'.format(test_acc, tmp_save_path))) print('Final cost time : {}s'.format(time.time() - start_time))
gt_bboxes = y.slice_axis(axis=-1, begin=0, end=4) gt_bboxes *= im_scale gt_difficulty = y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None val_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficulty) return val_metric.get() if __name__ == '__main__': opt = DefaultConfig() opt.parse({'model': 'vgg16_faster_rcnn', 'env': 'vgg16', 'lr_decay_epoch': '8, 16', 'preload': False, 'special_load': True, 'lr': 0.001, 'start_epoch': 0, 'max_epoch': 20, 'load_file_path': '', 'log_file_path': './log/vgg16_faster_rcnn.log'}) logger = get_logger(opt) if opt.special_load and opt.special_load_path is not None: model_train = getattr(models, opt.model)(True, opt.special_load_path) else: model_train = getattr(models, opt.model)() initialize_model_gpu(model_train) train_dataloader, val_dataloader, val_metric = dataloader.DogDataLoader(model_train)
tag2label = { "O": 0, "B-W": 1, "I-W": 2, } tag2label[START_TAG] = len(tag2label) tag2label[STOP_TAG] = len(tag2label) print("映射word and tag to id") sentence = [] for idx, data in enumerate(data): sent = list(data[0]) sent = tokens_to_ids(sent, vocab) sentence.append(sent) tags = [[tag2label[tag] for tag in x[1]] for x in data] opt.parse({'vocab_size': len(words), 'embedding_length': 300}) model = Bilstm_crf(opt, tag2label) optim = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters())) if torch.cuda.is_available(): model = model.cuda() x_train, x_valid, y_train, y_valid = train_test_split(sentence, tags, test_size=0.2) train_data = data_process.batch_iter(x_train, y_train, opt.batch_size, num_epochs=opt.num_epochs) steps = 0 min_delta = opt.min_delta
correct_num = 0 for i, (data, label) in enumerate(dataloader): if opt.use_gpu: data = data.cuda() score = model_test(data) _, predict = torch.max(score.data, 1) total_num += label.size(0) correct_num += (predict.cpu() == label).sum() return 100 * float(correct_num) / float(total_num) if __name__ == '__main__': print("Initialize starting options") opt = DefaultConfig() opt.parse({'batch_size': 128, 'num_workers': 4}) model_test = getattr(model, opt.model)() model_test.load("model/SuleymanNET_model_state_dict.pkl") testloader = data_loader(opt.root, opt.batch_size, opt.num_workers) accuracy = test(model_test, testloader, opt) print("Accuracy of Test Set: %.3f" % accuracy)
optimizer_g.zero_grad() # train by fake image # refresh the value of noises noises.data.copy_(torch.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = model_G(noises) output = model_D(fake_img) loss_g = criterion(output, true_labels) loss_g.backward() optimizer_g.step() loss_G_meter.add(loss_g.item()) if ii % opt.print_freq: vis.plot('loss_d', loss_D_meter.value()[0]) vis.plot('loss_g', loss_G_meter.value()[0]) fix_fake_img = model_G(fix_noises) vis.images(fix_fake_img.data.cpu().numpy()[:64] * 0.5 + 0.5, win='fixfake') if (epoch + 1) % 20 == 0: model_G.save(opt.save_model_path + opt.G_model + '_' + str(epoch)) model_D.save(opt.save_model_path + opt.D_model + '_' + str(epoch)) if __name__ == '__main__': opt = DefaultConfig() opt.parse({'max_epoch': 100}) train(opt)
def main(**kwargs): args = DefaultConfig() args.parse(kwargs) # boost模型 args.max_epochs = 5 if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train_iter, val_iter, test_iter, args.vocab_size, vectors = util.load_data( args) args.print_config() # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join( args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, args.id)) if args.cuda: torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu for lay_i in range(args.bo_layers): print('-------------- lay {} ---------------'.format(lay_i)) model = getattr(models, args.model)(args, vectors) model = model.cuda() print(model) best_score = 0.0 # 目标函数和优化器 criterion = F.cross_entropy lr1 = args.lr1 lr2 = args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) if lay_i != 0: # 加载上一层模型的loss weight saved_model = torch.load(args.model_path) loss_weight = saved_model['loss_weight'] print(list(enumerate(loss_weight))) loss_weight = loss_weight.cuda() for i in range(args.max_epochs): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(batch) == 1: continue text, label = batch.text, batch.label if args.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) if lay_i != 0: loss = criterion(pred, label, weight=loss_weight + 1 - loss_weight.mean()) else: loss = criterion(pred, label) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if idx % 80 == 79: print('[{}, {}] loss: {:.3f} | Acc: {:.3f}%({}/{})'.format( i + 1, idx + 1, total_loss / 20, 100. * correct / total, correct, total)) total_loss = 0.0 # 计算再验证集上的分数,并相应调整学习率 f1score, tmp_loss_weight = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = {'state_dict': model.state_dict(), 'config': args} torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path)['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format( best_score)) break # 保存训练最终的模型 # 保存当前层的loss weight loss_weight = tmp_loss_weight args.best_score = best_score final_model = { 'state_dict': model.state_dict(), 'config': args, 'loss_weight': loss_weight } args.model_path = os.path.join( args.save_dir, '{}_{}_lay{}_{}.pth'.format(args.model, args.text_type, lay_i, best_score)) torch.save(final_model, args.model_path) print('Best Final Model saved in {}'.format(args.model_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
sum(val_loss_his) / len(val_loss_his) / opt.batch_size) return val_acc, val_mean_loss if __name__ == '__main__': opt = DefaultConfig() opt.parse({ 'model': 'VGG16', 'env': 'VGG16', 'lr': 0.001, 'train_dir': '/home/qinliang/dataset/stanford_dog_dataset/cut_images_train', 'valid_dir': '/home/qinliang/dataset/stanford_dog_dataset/cut_images_val', 'save_path': './cut_image_checkpoints/', 'lr_decay': 0.5, 'preload': True, 'start_epoch': 0, 'max_epoch': 50, 'batch_size': 32, 'wd': 15e-4, 'load_file_path': '/home/qinliang/Desktop/kaggle/dog_recognition_gluon/checkpoints/epoch16_acc_99.31.params', 'log_file_path': './log/VGG16_cut_image.log' }) logger = get_logger(opt) model_train = getattr(models, opt.model)() model_train.initialize() convert_model_gpu(model_train) model_train.hybridize()
data = data.cuda() label = label.cuda() optimizer.zero_grad() score = model_train(data) loss = criterion(score, label) loss.backward() optimizer.step() loss_meter.add(loss.item()) model_train.train() # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param in optimizer.param_groups: param['lr'] = lr print("Changing learning rate to %.19f" % lr) previous_loss = loss_meter.value()[0] torch.save(model_train.state_dict(), "model/SuleymanNET_model_state_dict.pkl") if __name__ == '__main__': opt = DefaultConfig() opt.parse({'max_epoch': 110, 'weight_decay': 15e-4, 'lr': 0.1}) train(opt)
def main(**kwargs): args = DefaultConfig() args.parse(kwargs) if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train = pd.read_csv(args.train_path, sep='\t', encoding='utf-8', header=0) test_df = pd.read_csv(args.test_path, sep='\t', encoding='utf-8', header=0) corpus_all = pd.concat([train, test_df], axis=0) vocab = get_dictionary(corpus_all.text) args.vocab_size = len(vocab) train = list(zip(train.label, train.text)) test = list(zip(test_df.label, test_df.text)) train_data, val_data = train_test_split(train, test_size=0.1, random_state=1) train_iter = get_iter(train_data, vocab, args.batch_size, True, max_len=32) val_iter = get_iter(val_data, vocab, args.batch_size, True, max_len=32) test_iter = get_iter(test, vocab, args.batch_size, True, max_len=32) if args.pretrain_embeds_path is None: vectors = None else: vectors = pickle.load(args.pretrain_embeds_path) assert len( vectors) == args.vocab_size, '预训练的词向量shape[0]为%d,而字典大小为%d' % ( len(vectors), args.vocab_size) assert vectors.shape[ 1] == args.embedding_dim, '预训练词向量的shape[1]为%d,而设置的embedding_dim为%d' % ( vectors.shape[1], args.embedding_dim) args.print_config() global best_score # init model model = getattr(models, args.model)(args, vectors) print(model) # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join(args.save_dir, '{}_{}.pth'.format(args.model, args.id)) if args.cuda: torch.cuda.current_device() torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = args.lr1, args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) for i in range(args.max_epochs): total_loss = 0.0 pred_labels = [] labels = [] model.train() for idx, (b_x, b_y) in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(b_x) == 1: continue if args.cuda: b_x, b_y = b_x.cuda(), b_y.cuda() optimizer.zero_grad() pred = model(b_x) loss = criterion(pred, b_y) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] pred_labels.extend(predicted.numpy().tolist()) label = b_y.numpy().tolist() labels.extend(label) if idx % 100 == 0: print('[{}, {}] loss: {:.3f}'.format(i + 1, idx + 1, total_loss / (idx + 1))) # total_loss = 0.0 tf1score = metrics.f1_score(labels, pred_labels) print('[{}, {}] tf1_score:{}'.format(i + 1, idx + 1, total_loss / (idx + 1), tf1score)) # 计算再验证集上的分数,并相应调整学习率 f1score = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = {'state_dict': model.state_dict(), 'config': args} torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path)['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format(best_score)) break # 保存训练最终的模型 args.best_score = best_score final_model = {'state_dict': model.state_dict(), 'config': args} best_model_path = os.path.join( args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, best_score)) torch.save(final_model, best_model_path) print('Best Final Model saved in {}'.format(best_model_path)) # 在测试集上运行模型并生成概率结果和提交结果 if not os.path.exists(args.result_path): os.mkdir(args.result_path) probs, pre_labels = predict(model, test_iter, args) result_path = args.result_path + '{}_{}_{}'.format(args.model, args.id, args.best_score) np.save('{}.npy'.format(result_path), probs) print('Prob result {}.npy saved!'.format(result_path)) test_df['label'] = np.array(pre_labels) test_df[['idx', 'seq_id', 'label']].to_csv('{}.csv'.format(result_path), index=None) print('Result {}.csv saved!'.format(result_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
def train(**kwargs): # 根据命令行参数更新配置 opt = DefaultConfig() opt.parse(kwargs) print("参数配置完成") # 优化器 learning_rate = opt.learning_rate # optimizer默认是Adam optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9) if opt.optimizer_type == "SGD": optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif opt.optimizer_type == "Momentum": momentum = opt.momentum optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum) elif opt.optimizer_type == "Adam": optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9) # 建立静态图 with tf.Graph().as_default(): with tf.name_scope("inputs"): inputs = tf.placeholder("float", [None, 24, 2, 2], name="model_input") labels = tf.placeholder("float", [None, 72, 14, 2], name="labels") # 定义模型,统计并分类需要训练的模型参数 model = [] if opt.model_type == 1: # 反卷积 gmodel = GModel(opt.batch_size, opt.normal_type, True, "generate_model") model.append(gmodel) elif opt.model_type == 2: # 反卷积+可学习pooling gmodel = GModel(opt.batch_size, opt.normal_type, True, "generate_model") model.append(gmodel) learningpoolingmodel = LearningPoolingModel( opt.batch_size, opt.normal_type, True, opt.model_2_layers, "learning_pooling_model") model.append(learningpoolingmodel) elif opt.model_type == 3: # 反卷积+GAN gmodel = GModel(opt.batch_size, opt.normal_type, True, "generate_model") model.append(gmodel) dmodel = DModel(opt.batch_size, opt.normal_type, True, opt.GAN_type, "discriminate_model") model.append(dmodel) # print(model) # 统计并分类需要训练的参数 # 由于下面加上了对tf.GraphKeys.UPDATE_OPS的依赖,所以get_vars函数要加到calculate_loss函数后面 # 不然就会导致all_vars为空 def get_vars(): all_vars = tf.trainable_variables() # print(all_vars) gg_vars = [var for var in all_vars if "generate_model" in var.name] dd_vars = [ var for var in all_vars if "discriminate_mode" in var.name ] ll_pp_vars = [ var for var in all_vars if "learning_pooling_model" in var.name ] return gg_vars, dd_vars, ll_pp_vars # 加上对update_ops的依赖,不然BN就会出现问题! update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.device(opt.gpu_num): if opt.model_type == 1: # 反卷积 pre_loss, mse, pred = model[0].calculate_loss(inputs, labels) g_vars, _, _ = get_vars() with tf.control_dependencies(update_ops): train_ops = optimizer.minimize(pre_loss, var_list=g_vars) elif opt.model_type == 2: # 反卷积+可学习pooling _, mse, pred = model[0].calculate_loss(inputs, labels) l_p_loss = model[1].calculate_loss(pred, labels, opt.model_2_scale) g_vars, _, l_p_vars = get_vars() with tf.control_dependencies(update_ops): train_ops = optimizer.minimize(l_p_loss, var_list=g_vars + l_p_vars) elif opt.model_type == 3: # 反卷积+GAN pre_loss, mse, pred = model[0].calculate_loss(inputs, labels) gen_loss, dis_loss = model[1].calculate_loss(pred, labels) g_vars, d_vars, _ = get_vars() with tf.control_dependencies(update_ops): # D网络的训练 --> G网络的训练 ——> 先验网络(也就是G网络)的训练 d_train_ops = optimizer.minimize(dis_loss, var_list=d_vars) g_train_ops = optimizer.minimize(gen_loss, var_list=g_vars) pre_train_ops = optimizer.minimize(pre_loss, var_list=g_vars) tf.summary.scalar("MSE", mse) tf.add_to_collection("input_batch", inputs) tf.add_to_collection("predictions", pred) saver = tf.train.Saver() init = tf.global_variables_initializer() # 开始训练 config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = opt.per_process_gpu_memory_fraction with tf.Session(config=config) as sess: # 首先是参数的初始化 sess.run(init) if opt.model_type == 1: model_type = "model_1" elif opt.model_type == 2: model_type = "model_2_" + str(opt.model_2_layers) elif opt.model_type == 3: model_type = "model_3" summary_path = opt.summary_path + model_type + "\\data_SNR_" + str( opt.SNR) writer = tf.summary.FileWriter(summary_path, sess.graph) merge_ops = tf.summary.merge_all() start = time.time() data_path = opt.train_data_path + "data_SNR_" + str(opt.SNR) # 定义训练集dataset train_dataset = CSISet(data_path, opt.batch_size, True, state="train") # 定义验证集dataset validation_dataset = CSISet(data_path, opt.batch_size, True, state="validation") # 保存训练集和验证集的中间值,用于后续的画图 train_mse_for_plot = [] valid_mse_for_plot = [] for num in range(opt.num_epoch): # 判断是否需要改变学习率 if opt.optimizer_type == "Momentum" and ( num % opt.learning_rate_change_epoch) == 0: learning_rate *= opt.learning_rate_decay print("第%i个epoch开始,当前学习率是%f" % (num, learning_rate)) for ii, (batch_x, batch_y) in enumerate(train_dataset.get_data()): if opt.model_type == 1 or opt.model_type == 2: _, train_mse, summary = sess.run( [train_ops, mse, merge_ops], feed_dict={ inputs: batch_x, labels: batch_y }) elif opt.model_type == 3: _, _, _, train_mse, summary = sess.run([ d_train_ops, g_train_ops, pre_train_ops, mse, merge_ops ], feed_dict={ inputs: batch_x, labels: batch_y }) writer.add_summary(summary) if (ii + 1) % 1000 == 0: print("epoch-%d, batch_num-%d: 当前batch训练数据误差是%f" % (num + 1, ii + 1, train_mse)) # 每1000个batch就在验证集上测试一次 validate_mse = 0 jj = 1 for (validate_x, validate_y) in validation_dataset.get_data(): temp_mse = sess.run(mse, feed_dict={ inputs: validate_x, labels: validate_y }) validate_mse += temp_mse jj += 1 validate_mse = validate_mse / (jj + 1) print("epoch-%d: 当前阶段验证集数据平均误差是%f" % (num + 1, validate_mse)) train_mse_for_plot.append(train_mse) valid_mse_for_plot.append(validate_mse) end = time.time() utils.print_time(start, end, "跑完" + str(opt.num_epoch) + "个epoch") plot_path = opt.result_path + model_type + "\\data_SNR_" + str( opt.SNR) + "\\train" utils.plot_fig(train_mse_for_plot, valid_mse_for_plot, plot_path) print("训练过程中最小验证误差是%f" % min(valid_mse_for_plot)) # 保存模型文件 model_file = opt.model_path + model_type + "\\data_SNR_" + str( opt.SNR) + "\\data_SNR_" + str(opt.SNR) model_utils.save_model(saver, sess, model_file)
data = data.cuda() score = model_train(data) confusion_matrix.add(score.data.squeeze(), label.type(torch.LongTensor)) _, predict = torch.max(score.data, 1) total_num += label.size(0) correct_num += (predict.cpu() == label).sum() model_train.train() accuracy = 100 * float(correct_num) / float(total_num) return confusion_matrix, accuracy if __name__ == '__main__': # login itchat itchat.auto_login() itchat_send('start!') opt = DefaultConfig() opt.parse({ 'model': 'VGG19', 'max_epoch': 200, 'weight_decay': 15e-4, 'lr': 0.1 }) train(opt)