def __init__(self, config: BaseConfig) -> None: super().__init__(config) # define the net, such as net_%s % net_names # self.net_face_upper = self.config = config self.net_names = ["whole"] # define the net self.net_whole = self.create_network_model() self.attr_whole_index = [i for i in range(config.dataset_config.attribute_num)] # define input and output for name in self.net_names: setattr(self, "img_%s" % name, None) setattr(self, "output_%s" % name, None) setattr(self, "attr_%s" % name, None) # define optimizer and loss if config.isTrain: for name in self.net_names: if self.config.loss_name == "bce": setattr(self, "criterion_%s" % name, nn.BCEWithLogitsLoss()) else: setattr(self, "criterion_%s" % name, create_loss(self.config)) setattr(self, "optimizer_%s" % name, optim.Adam(getattr(self, "net_%s" % name).parameters(), lr=config.lr, betas=(config.beta1, 0.999))) setattr(self, "loss_%s" % name, None) self.optimizers.append(getattr(self, "optimizer_%s" % name))
def main(args): logdir = init_logging(args) logger = logging.getLogger(__name__) args.logdir = logdir if args.cpu or not th.cuda.is_available(): device = th.device('cpu') else: device = th.device('cuda') cudnn.enabled = True cudnn.benchmark = True if not args.devrun and not args.nosave: wandb.init(config=args, dir=logdir, project=args.project) if args.name is not None: wandb.run.name = args.name # else: # wandb.run.name = wandb.run.id seed_all(args.seed) logger.info('Creating dataloader') loader = create_dataloader(args) logger.info('Creating model') model = create_model(args).to(device) logger.info('Creating optimiser') opt = create_optimiser(model.parameters(), args) logger.info('Creating loss') loss = create_loss(args) logger.info('Creating trainer') trainer = create_trainer(loader, model, opt, loss, device, args) epochs = args.epochs epoch_length = args.epoch_length logger.info('Starting trainer') wandb.watch(model, log="all", log_freq=1) trainer.run(loader['train'], max_epochs=epochs, epoch_length=epoch_length)
data_folder = 'data' batch_size = p.BATCH_SIZE input_prod = InputProducer(data_folder, 'png') inp = tf.placeholder(tf.float32, shape=[None, p.IMAGE_SIZE, p.IMAGE_SIZE, p.CHANNELS]) inp_compressed = tf.placeholder(tf.float32, shape=[None, p.IMAGE_SIZE, p.IMAGE_SIZE, p.CHANNELS]) return_image = net.create_flat_net(inp_compressed) loss_value = loss.create_loss(inp, return_image) tf.summary.scalar('Loss', loss_value) global_step = tf.Variable(0, dtype=tf.int32) train_step = tf.train.AdamOptimizer(0.001).minimize(loss_value, global_step=global_step) merged = tf.summary.merge_all() if not os.path.exists('./networks/'): os.makedirs('./networks/') with tf.Session() as sess: net_name = 'less_jpeg-full-deep-res' saver = tf.train.Saver()
def main(): train_program = fluid.Program() train_init = fluid.Program() with fluid.program_guard(train_program, train_init): image = fluid.layers.data(name="image", shape=[cfg.TRAIN.THICKNESS, 512, 512], dtype="float32") label = fluid.layers.data(name="label", shape=[1, 512, 512], dtype="int32") train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=cfg.TRAIN.BATCH_SIZE * 2, iterable=True, use_double_buffer=True, ) prediction = create_model(image, 2) avg_loss = loss.create_loss(prediction, label, 2) miou = loss.mean_iou(prediction, label, 2) # 进行正则化 if cfg.TRAIN.REG_TYPE == "L1": decay = paddle.fluid.regularizer.L1Decay(cfg.TRAIN.REG_COEFF) elif cfg.TRAIN.REG_TYPE == "L2": decay = paddle.fluid.regularizer.L2Decay(cfg.TRAIN.REG_COEFF) else: decay = None # 选择优化器 lr = fluid.layers.piecewise_decay(boundaries=cfg.TRAIN.BOUNDARIES, values=cfg.TRAIN.LR) if cfg.TRAIN.OPTIMIZER == "adam": optimizer = fluid.optimizer.AdamOptimizer( learning_rate=lr, regularization=decay, ) elif cfg.TRAIN.OPTIMIZER == "sgd": optimizer = fluid.optimizer.SGDOptimizer(learning_rate=lr, regularization=decay) elif cfg.TRAIN.OPTIMIZE == "momentum": optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=lr, regularization=decay, ) else: raise Exception("错误的优化器类型: {}".format(cfg.TRAIN.OPTIMIZER)) optimizer.minimize(avg_loss) places = fluid.CUDAPlace(0) if cfg.TRAIN.USE_GPU else fluid.CPUPlace() exe = fluid.Executor(places) exe.run(train_init) exe_test = fluid.Executor(places) test_program = train_program.clone(for_test=True) compiled_train_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_loss.name) if cfg.TRAIN.PRETRAINED_WEIGHT != "": print("Loading paramaters") fluid.io.load_persistables(exe, cfg.TRAIN.PRETRAINED_WEIGHT, train_program) # train_reader = fluid.io.xmap_readers( # aug_mapper, data_reader(0, 8), multiprocessing.cpu_count()/2, 16 # ) train_reader = data_reader(0, 8) train_loader.set_sample_generator(train_reader, batch_size=cfg.TRAIN.BATCH_SIZE, places=places) test_reader = paddle.batch(data_reader(8, 10), cfg.INFER.BATCH_SIZE) test_feeder = fluid.DataFeeder(place=places, feed_list=[image, label]) writer = LogWriter(logdir="/home/aistudio/log/{}".format(datetime.now())) step = 0 best_miou = 0 for pass_id in range(cfg.TRAIN.EPOCHS): for train_data in train_loader(): step += 1 avg_loss_value, miou_value = exe.run(compiled_train_program, feed=train_data, fetch_list=[avg_loss, miou]) writer.add_scalar(tag="train_loss", step=step, value=avg_loss_value[0]) writer.add_scalar(tag="train_miou", step=step, value=miou_value[0]) if step % cfg.TRAIN.DISP_BATCH == 0: print("\tTrain pass {}, Step {}, Cost {}, Miou {}".format( pass_id, step, avg_loss_value[0], miou_value[0])) if math.isnan(float(avg_loss_value[0])): sys.exit("Got NaN loss, training failed.") if step % cfg.TRAIN.SNAPSHOT_BATCH == 0 and cfg.TRAIN.DO_EVAL: test_step = 0 eval_miou = 0 test_losses = [] test_mious = [] for test_data in test_reader(): test_step += 1 preds, test_loss, test_miou = exe_test.run( test_program, feed=test_feeder.feed(test_data), fetch_list=[prediction, avg_loss, miou], ) test_losses.append(test_loss[0]) test_mious.append(test_miou[0]) if test_step % cfg.TRAIN.DISP_BATCH == 0: print("\t\tTest Loss: {} , Miou: {}".format( test_loss[0], test_miou[0])) eval_miou = np.average(np.array(test_mious)) writer.add_scalar( tag="test_miou", step=step, value=eval_miou, ) print("Test loss: {} ,miou: {}".format( np.average(np.array(test_losses)), eval_miou)) ckpt_dir = os.path.join(cfg.TRAIN.CKPT_MODEL_PATH, str(step) + "_" + str(eval_miou)) fluid.io.save_persistables(exe, ckpt_dir, train_program) print("此前最高的测试MIOU是: ", best_miou) if step % cfg.TRAIN.SNAPSHOT_BATCH == 0 and eval_miou > best_miou: best_miou = eval_miou print("正在保存第 {} step的权重".format(step)) fluid.io.save_inference_model( cfg.TRAIN.INF_MODEL_PATH, feeded_var_names=["image"], target_vars=[prediction], executor=exe, main_program=train_program, )