示例#1
0
def train(dataloaders, model, opt, num_epochs=10):
    trainer = BaseTrainer(opt, device)
    board = TensorboardCallback(opt)
    trainer.run_train(model,
                      dataloaders,
                      callbacks=[board],
                      num_epochs=num_epochs)
    return model
示例#2
0
def train(args):
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    shutil.copy(args.config_file, cfg.OUTPUT_DIR)

    num_gpus = torch.cuda.device_count()

    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info(args)
    logger.info('Running with config:\n{}'.format(cfg))

    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)

    model = build_model(cfg, num_classes)
    # print(model)
    loss_func = make_loss(cfg, num_classes)

    trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss_func, num_query,
                          num_gpus)

    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#3
0
def worker(gpu, ngpus_per_node, args):

    print("running base training...")
    model = BaseTrainer(args)
    model.make_model_env(gpu, ngpus_per_node)
    model.make_run_env()
    model.evaluate_model(60001)   #change the output name by changing number
示例#4
0
def setup_teacher(t_name, params):
    # Teacher Model
    num_classes = params["num_classes"]
    t_net = create_model(t_name, num_classes, params["device"])
    teacher_config = params.copy()
    teacher_config["test_name"] = t_name + "_teacher"

    if params["t_checkpoint"]:
        # Just validate the performance
        print("---------- Loading Teacher -------")
        best_teacher = params["t_checkpoint"]
    else:
        # Teacher training
        print("---------- Training Teacher -------")
        teacher_trainer = BaseTrainer(t_net, config=teacher_config)
        teacher_trainer.train()
        best_teacher = teacher_trainer.best_model_file

    # reload and get the best model
    t_net = util.load_checkpoint(t_net, best_teacher)
    teacher_trainer = BaseTrainer(t_net, config=teacher_config)
    best_t_acc = teacher_trainer.validate()

    # also save this information in a csv file for plotting
    name = teacher_config["test_name"] + "_val"
    acc_file_name = params["results_dir"].joinpath(f"{name}.csv")
    with acc_file_name.open("w+") as acc_file:
        acc_file.write("Training Loss,Validation Loss\n")
        for _ in range(params["epochs"]):
            acc_file.write(f"0.0,{best_t_acc}\n")
    return t_net, best_teacher, best_t_acc
示例#5
0
def worker(gpu, ngpus_per_node, args):
    if args.adv:
        model = AdvTrainer(args)
    else:
        model = BaseTrainer(args)

    model.make_model_env(gpu, ngpus_per_node)
    model.make_run_env()
    model.train()
示例#6
0
def worker(gpu, ngpus_per_node, args):
    if args.adv:
        print("running adv training...")
        model = AdvTrainer(args)
    else:
        print("running base training...")
        model = BaseTrainer(args)
    model.make_model_env(gpu, ngpus_per_node)
    model.make_run_env()
    model.train()
示例#7
0
文件: main.py 项目: arkhycat/mrqa
def worker(gpu, ngpus_per_node, args):
    if args.adv:
        print("running adv training...")
        model = AdvTrainer(args)
    else:
        print("running base training...")
        model = BaseTrainer(args)
    model.make_model_env(gpu, ngpus_per_node)
    model.make_run_env()
    if args.only_test:
        #model.test()
        model.get_embeddings()
    else:
        model.train()
示例#8
0
def main():
    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = torch.cuda.device_count()
    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))
    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)
    model = build_model(cfg, num_classes)
    loss = make_loss(cfg, num_classes)
    trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_query,
                          num_gpus)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#9
0
def main():
    parser = argparse.ArgumentParser(description="Baseline Training")
    parser.add_argument("--config_file", default="", help="path to config file", type=str)
    parser.add_argument("opts", help="Modify config options using the command-line", default=None,
                        nargs=argparse.REMAINDER)
    args = parser.parse_args()
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = 0
    device = torch.device("cpu")
    if cfg.MODEL.DEVICE == 'cuda' and torch.cuda.is_available():
        num_gpus = len(cfg.MODEL.DEVICE_IDS)-1
        device_ids = cfg.MODEL.DEVICE_IDS.strip("d")
        print(device_ids)
        device = torch.device("cuda:{0}".format(device_ids))

    logger = setup_logger('baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))


    train_dl, val_dl = make_dataloader(cfg, num_gpus)

    model = build_model(cfg)

    loss = make_loss(cfg, device)

    trainer = BaseTrainer(cfg, model, train_dl, val_dl,
                                  loss, num_gpus, device)

    logger.info(type(model))
    logger.info(loss)
    logger.info(trainer)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#10
0
    def build_trainer(self, args: ClassifierArgs, dataset: Dataset,
                      data_loader: DataLoader) -> BaseTrainer:
        # get optimizer
        optimizer = self.build_optimizer(args)

        # get learning rate decay
        lr_scheduler = CosineAnnealingLR(
            optimizer,
            len(dataset) // args.batch_size * args.epochs)

        # get tensorboard writer
        writer = self.build_writer(args)

        trainer = BaseTrainer(data_loader, self.model, self.loss_function,
                              optimizer, lr_scheduler, writer)
        if args.training_type == 'freelb':
            trainer = FreeLBTrainer(data_loader, self.model,
                                    self.loss_function, optimizer,
                                    lr_scheduler, writer)
        elif args.training_type == 'pgd':
            trainer = PGDTrainer(data_loader, self.model, self.loss_function,
                                 optimizer, lr_scheduler, writer)
        elif args.training_type == 'advhotflip':
            trainer = HotflipTrainer(args, self.tokenizer, data_loader,
                                     self.model, self.loss_function, optimizer,
                                     lr_scheduler, writer)
        elif args.training_type == 'metric':
            trainer = EmbeddingLevelMetricTrainer(data_loader, self.model,
                                                  self.loss_function,
                                                  optimizer, lr_scheduler,
                                                  writer)
        elif args.training_type == 'metric_token':
            trainer = TokenLevelMetricTrainer(args, self.tokenizer,
                                              data_loader, self.model,
                                              self.loss_function, optimizer,
                                              lr_scheduler, writer)
        elif args.training_type == 'sparse':
            # trick = True if args.dataset_name in ['mr'] else False
            trainer = MaskTrainer(args, self.data_processor, data_loader,
                                  self.model, self.loss_function, optimizer,
                                  lr_scheduler, writer)
        elif args.training_type == 'safer':
            trainer = SAFERTrainer(args, self.data_processor, data_loader,
                                   self.model, self.loss_function, optimizer,
                                   lr_scheduler, writer)
        return trainer
示例#11
0
def main():
    parser = argparse.ArgumentParser(description="ReID Baseline Training")
    parser.add_argument("--config_file", default="", help="path to config file", type=str)
    parser.add_argument("opts", help="Modify config options using the command-line", default=None,nargs=argparse.REMAINDER)
    args = parser.parse_args()
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = torch.cuda.device_count()
    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))
    if cfg.INPUT.SEPNORM.USE:
        train_dl, val_dl, num_query, num_classes = make_sepnorm_dataloader(cfg, num_gpus)
    elif cfg.DATASETS.EXEMPLAR.USE:
        train_dl, val_dl, num_query, num_classes,exemplar_dl = make_dataloader(cfg, num_gpus)
    else:
        train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)

    model = build_model(cfg, num_classes)
    loss = make_loss(cfg, num_classes)
    if cfg.SOLVER.CENTER_LOSS.USE == True:
        trainer = CenterTrainer(cfg, model, train_dl, val_dl,
                      loss, num_query, num_gpus)
    else:
        if cfg.SOLVER.MIXUP.USE:
            trainer = NegMixupTrainer(cfg, model, train_dl, val_dl,
                              loss, num_query, num_gpus)
        elif cfg.DATASETS.EXEMPLAR.USE:
            if cfg.DATASETS.EXEMPLAR.MEMORY.USE:
                trainer = ExemplarMemoryTrainer(cfg, model, train_dl, val_dl,exemplar_dl,
                                  loss, num_query, num_gpus)
            else:
                trainer = UIRLTrainer(cfg, model, train_dl, val_dl,exemplar_dl,
                                  loss, num_query, num_gpus)
        elif cfg.DATASETS.HIST_LABEL.USE:
            trainer = HistLabelTrainer(cfg, model, train_dl, val_dl,
                    loss, num_query, num_gpus)
        else:
            trainer = BaseTrainer(cfg, model, train_dl, val_dl,
                              loss, num_query, num_gpus)
    if cfg.INPUT.SEPNORM.USE:
        logger.info('train transform0: \n{}'.format(train_dl.dataset.transform0))
        logger.info('train transform1: \n{}'.format(train_dl.dataset.transform1))

        logger.info('valid transform0: \n{}'.format(val_dl.dataset.transform0))
        logger.info('valid transform1: \n{}'.format(val_dl.dataset.transform1))

    else:
        logger.info('train transform: \n{}'.format(train_dl.dataset.transform))
        logger.info('valid transform: \n{}'.format(val_dl.dataset.transform))
    logger.info(type(model))
    logger.info(loss)
    logger.info(trainer)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#12
0
def test_nokd(s_net, t_net, params):
    print("---------- Training NOKD -------")
    nokd_config = params.copy()
    nokd_trainer = BaseTrainer(s_net, config=nokd_config)
    best_acc = nokd_trainer.train()
    return best_acc
示例#13
0
train_dataset, test_dataset = random_split(
    HAM10000('datasets/archive/'), [8015, 2000],
    generator=torch.Generator().manual_seed(42))

model = AttUNet().to(device)
if pretrained_path is not None:
    load(model=model, name=pretrained_path)

if what == 'train':
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # Trainer and Training
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

    trainer = BaseTrainer(model, train_dataset, loss=FocalLoss()).to(device)

    trainer.supervise(lr=lr,
                      epochs=epochs,
                      batch_size=batch_size,
                      name='store/' + name)

if what in ['train', 'validate']:
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # Validate
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    test(model, test_dataset, BinaryIOU())

if what in ['train', 'validate', 'draw']:
示例#14
0
import yaml
import argparse

from trainer import BaseTrainer


def arg_parser():
    parser = argparse.ArgumentParser(description="config")
    parser.add_argument("--config",
                        type=str,
                        default="config/test.yaml",
                        help="Specified the path of configuration file to be used.")

    return parser.parse_args()


if __name__ == '__main__':
    import torch

    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True

    arg = arg_parser()
    config = arg.config

    with open(config) as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)

    trainer = BaseTrainer(config, stage="Test")
    trainer.test()
示例#15
0
import argparse

from trainer import BaseTrainer


def arg_parser():
    parser = argparse.ArgumentParser(description="config")
    parser.add_argument(
        "--config",
        type=str,
        default="config/train.yaml",
        help="Specified the path of configuration file to be used.")

    return parser.parse_args()


if __name__ == '__main__':
    import torch

    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True

    arg = arg_parser()
    config = arg.config

    with open(config) as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)

    trainer = BaseTrainer(config, stage="Train")
    trainer.train()