def train(): """ :return: """ # create model model = MobileNetV2(args.num_classes, width_mult=args.width_mult) # loading pre trained weight logger.logger.info("Loading PreTrained Weight".center(100, '=')) utils.load_filtered_stat_dict( model, model_zoo.load_url(model_urls["mobilenet_v2"])) # loading data logger.logger.info("Loading data".center(100, '=')) train_data_loader, valid_data_loader = loadData(args.train_data, args.input_size, args.batch_size, args.num_classes) print() # initialize loss function cls_criterion = nn.BCEWithLogitsLoss().cuda(0) reg_criterion = nn.MSELoss().cuda(0) softmax = nn.Softmax(dim=1).cuda(0) model.cuda(0) # training logger.logger.info("Training".center(100, '=')) # initialize learning rate and step lr = args.lr step = 0 for epoch in range(args.epochs + 1): print("Epoch:", epoch) if epoch > args.unfreeze: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model), "lr": lr }, { "params": get_cls_fc_params(model), "lr": lr }], lr=args.lr) else: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model), "lr": 0 }, { "params": get_cls_fc_params(model), "lr": lr * 5 }], lr=args.lr) lr = lr * args.lr_decay min_degree_error = 180. for i, (images, classify_label, vector_label, name) in enumerate(train_data_loader): step += 1 images = images.cuda(0) classify_label = classify_label.cuda(0) vector_label = vector_label.cuda(0) # inference x_cls_pred, y_cls_pred, z_cls_pred = model(images) logits = [x_cls_pred, y_cls_pred, z_cls_pred] loss, degree_error = utils.computeLoss(classify_label, vector_label, logits, softmax, cls_criterion, reg_criterion, args) #print(loss) # backward grad = [torch.tensor(1.0).cuda(0) for _ in range(3)] optimizer.zero_grad() torch.autograd.backward(loss, grad) optimizer.step() # save training log and weight if (i + 1) % 10 == 0: msg = "Epoch: %d/%d | Iter: %d/%d | x_loss: %.6f | y_loss: %.6f | z_loss: %.6f | degree_error:%.3f" % ( epoch, args.epochs, i + 1, len(train_data_loader.dataset) // args.batch_size, loss[0].item(), loss[1].item(), loss[2].item(), degree_error.item()) logger.logger.info(msg) valid_degree_error = valid(model, valid_data_loader, softmax) # writer summary writer.add_scalar("train degrees error", degree_error, step) writer.add_scalar("valid degrees error", valid_degree_error, step) # saving snapshot if valid_degree_error < min_degree_error: min_degree_error = valid_degree_error logger.logger.info( "A better validation degrees error {}".format( valid_degree_error)) torch.save( model.state_dict(), os.path.join( snapshot_dir, output_string + '_epoch_' + str(epoch) + '.pkl'))
net = sam(net, 32, name='sam2') net = tf.nn.relu(net) net = conv2d(net, 1, 5, name='conv3') return net tf.reset_default_graph() phTrainInput = tf.placeholder(tf.float32, [None, patch_size, patch_size, NUM_BANDS_IN], name='train_images') phTrainTarget = tf.placeholder(tf.float32, [None, patch_size, patch_size, 1], name='train_labels') global_steps = tf.Variable(0, name="global_step", trainable=False) phPredction = ASRCNN(phTrainInput) loss = utils.computeLoss(phTrainTarget, phPredction) curr_lr_op = tf.train.exponential_decay(lr, global_steps, decay_step, decay_ratio, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=curr_lr_op).minimize( loss, global_step=global_steps) gpu_options = tf.GPUOptions(allow_growth=allow_growth) # data trainData1, testData, trainData2, trainTarget2, testTarget, trainTarget1, minNDVI, maxNDVI, perm = utils.readData( data_file, rcstart, rcend, opt.mode, data_scale) trainData = [trainData1, trainData2] trainTarget = [trainTarget1, trainTarget2]
def train(net, bins, alpha, beta, batch_size): """ params: bins: number of bins for classification alpha: regression loss weight beta: ortho loss weight """ # create model if net == "resnet50": model = ResNet(torchvision.models.resnet50(pretrained=True), num_classes=bins) lr = args.lr_resnet else: model = MobileNetV2(bins) lr = args.lr_mobilenet # loading data logger.logger.info("Loading data".center(100, '=')) train_data_loader = loadData(args.train_data, args.input_size, batch_size, bins) valid_data_loader = loadData(args.valid_data, args.input_size, batch_size, bins, False) # initialize cls loss function if args.cls_loss == "KLDiv": cls_criterion = nn.KLDivLoss(reduction='batchmean').cuda(0) elif args.cls_loss == "BCE": cls_criterion = nn.BCELoss().cuda(0) # initialize reg loss function reg_criterion = nn.MSELoss().cuda(0) softmax = nn.Softmax(dim=1).cuda(0) model.cuda(0) # training log logger.logger.info("Training".center(100, '=')) # initialize learning rate and step lr = lr step = 0 # validation error min_avg_error = 1000. # start training for epoch in range(args.epochs): print("Epoch:", epoch) # learning rate initialization if net == 'resnet50': if epoch >= args.unfreeze: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model, net), "lr": lr }, { "params": get_cls_fc_params(model), "lr": lr * 10 }], lr=args.lr_resnet) else: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model, net), "lr": lr }, { "params": get_cls_fc_params(model), "lr": lr * 10 }], lr=args.lr_resnet) else: if epoch >= args.unfreeze: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model, net), "lr": lr }, { "params": get_cls_fc_params(model), "lr": lr }], lr=args.lr_mobilenet) else: optimizer = torch.optim.Adam( [{ "params": get_non_ignored_params(model, net), "lr": lr * 10 }, { "params": get_cls_fc_params(model), "lr": lr * 10 }], lr=args.lr_mobilenet) # reduce lr by lr_decay factor for each epoch lr = lr * args.lr_decay print("------------") for i, (images, cls_v1, cls_v2, cls_v3, reg_v1, reg_v2, reg_v3, name) in enumerate(train_data_loader): step += 1 images = images.cuda(0) # get classified labels cls_v1 = cls_v1.cuda(0) cls_v2 = cls_v2.cuda(0) cls_v3 = cls_v3.cuda(0) # get continuous labels reg_v1 = reg_v1.cuda(0) reg_v2 = reg_v2.cuda(0) reg_v3 = reg_v3.cuda(0) # inference x_pred_v1, y_pred_v1, z_pred_v1, x_pred_v2, y_pred_v2, z_pred_v2, x_pred_v3, y_pred_v3, z_pred_v3 = model( images) logits = [ x_pred_v1, y_pred_v1, z_pred_v1, x_pred_v2, y_pred_v2, z_pred_v2, x_pred_v3, y_pred_v3, z_pred_v3 ] loss, degree_error_v1, degree_error_v2, degree_error_v3 = utils.computeLoss( cls_v1, cls_v2, cls_v3, reg_v1, reg_v2, reg_v3, logits, softmax, cls_criterion, reg_criterion, [ bins, alpha, beta, args.cls_loss, args.reg_loss, args.ortho_loss ]) # backward grad = [torch.tensor(1.0).cuda(0) for _ in range(3)] optimizer.zero_grad() torch.autograd.backward(loss, grad) optimizer.step() # save training log and weight if (i + 1) % 100 == 0: msg = "Epoch: %d/%d | Iter: %d/%d | x_loss: %.6f | y_loss: %.6f | z_loss: %.6f | degree_error_f:%.3f | degree_error_r:%.3f | degree_error_u:%.3f" % ( epoch, args.epochs, i + 1, len(train_data_loader.dataset) // batch_size, loss[0].item(), loss[1].item(), loss[2].item(), degree_error_v1.item(), degree_error_v2.item(), degree_error_v3.item()) logger.logger.info(msg) # Test on validation dataset error_v1, error_v2, error_v3 = valid(model, valid_data_loader, softmax, bins) print("Epoch:", epoch) print("Validation Error:", error_v1.item(), error_v2.item(), error_v3.item()) logger.logger.info("Validation Error(l,d,f)_{},{},{}".format( error_v1.item(), error_v2.item(), error_v3.item())) # save model if achieve better validation performance if error_v1.item() + error_v2.item() + error_v3.item() < min_avg_error: min_avg_error = error_v1.item() + error_v2.item() + error_v3.item() print("Training Info:") print("Model:", net, " ", "Number of bins:", bins, " ", "Alpha:", alpha, " ", "Beta:", beta) print("Saving Model......") torch.save( model.state_dict(), os.path.join(snapshot_dir, output_string + '_Best_' + '.pkl')) print("Saved")
def train(): """ :return: """ # create model model = vgg19_bn() if torch.cuda.device_count() > 1: model = nn.DataParallel(model) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) # loading pre trained weight #logger.logger.info("Loading PreTrained Weight".center(100, '=')) #utils.load_filtered_stat_dict(model, model_zoo.load_url(model_urls["mobilenet_v2"])) # loading data logger.logger.info("Loading data".center(100, '=')) train_data_loader, valid_data_loader = loadData(args.train_data, args.input_size, args.batch_size, args.num_classes) print() # initialize loss function cls_criterion = nn.BCEWithLogitsLoss() reg_criterion = nn.MSELoss() softmax = nn.Softmax(dim=1) #model.to(device) # training logger.logger.info("Training".center(100, '=')) # initialize learning rate and step lr = args.lr step = 0 for epoch in range(args.epochs + 1): print("Epoch:", epoch) if epoch > args.unfreeze: optimizer = torch.optim.Adam([{"params": get_non_ignored_params(model), "lr": lr}, {"params": get_cls_fc_params(model), "lr": lr}], lr=args.lr) else: optimizer = torch.optim.Adam([{"params": get_non_ignored_params(model), "lr": lr}, {"params": get_cls_fc_params(model), "lr": lr * 10}], lr=args.lr) lr = lr * args.lr_decay min_degree_error = 180. for i, (images, cls_label_f, cls_label_r, cls_label_u, vector_label_f, vector_label_r, vector_label_u, name) in enumerate(train_data_loader): step += 1 images = images.to(device) #classify_label = classify_label.cuda(0) #vector_label = vector_label.cuda(0) cls_label_f = cls_label_f.to(device) cls_label_r = cls_label_r.to(device) cls_label_u = cls_label_u.to(device) vector_label_f = vector_label_f.to(device) vector_label_r = vector_label_r.to(device) vector_label_u = vector_label_u.to(device) # inference x_cls_pred_f, y_cls_pred_f, z_cls_pred_f,x_cls_pred_r, y_cls_pred_r, z_cls_pred_r,x_cls_pred_u, y_cls_pred_u, z_cls_pred_u = model(images) logits = [x_cls_pred_f, y_cls_pred_f, z_cls_pred_f,x_cls_pred_r, y_cls_pred_r, z_cls_pred_r,x_cls_pred_u, y_cls_pred_u, z_cls_pred_u] loss, degree_error_f, degree_error_r, degree_error_u = utils.computeLoss(cls_label_f, cls_label_r, cls_label_u, vector_label_f, vector_label_r, vector_label_u, logits, softmax, cls_criterion, reg_criterion, args) #print(loss) # backward grad = [torch.tensor(1.0).to(device) for _ in range(12)] optimizer.zero_grad() torch.autograd.backward(loss, grad) optimizer.step() # save training log and weight if (i + 1) % 50 == 0: msg = "Epoch: %d/%d | Iter: %d/%d | x_loss: %.6f | y_loss: %.6f | z_loss: %.6f | degree_error_f:%.3f | degree_error_r:%.3f | degree_error_u:%.3f" % ( epoch, args.epochs, i + 1, len(train_data_loader.dataset) // args.batch_size, loss[0].item()+loss[3].item()+loss[6].item(), loss[1].item()+loss[4].item()+loss[7].item(), loss[2].item()+loss[5].item()+loss[8].item(), degree_error_f.item(), degree_error_r.item(), degree_error_u.item()) logger.logger.info(msg) valid_degree_error_f, valid_degree_error_r, valid_degree_error_u = valid(model, valid_data_loader, softmax) # writer summary writer.add_scalar("train degrees error", degree_error_f, step) writer.add_scalar("valid degrees error", valid_degree_error_f, step) # saving snapshot if valid_degree_error_f + valid_degree_error_r + valid_degree_error_u < min_degree_error: min_degree_error = valid_degree_error_f + valid_degree_error_r + valid_degree_error_u logger.logger.info("A better validation degrees error {}".format(min_degree_error)) torch.save(model.state_dict(), os.path.join(snapshot_dir, output_string + '_epoch_' + str(epoch) + '_constrain_a=0.075' +'.pkl'))