Exemplo n.º 1
0
def main(batch_size, baseline, reduction):
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        'data/cifar10', train=True, download=True, transform=transform_train),
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        'data/cifar10', train=False, transform=transform_test),
                                              batch_size=batch_size,
                                              shuffle=True)
    if baseline:
        model = resnet20()
    else:
        model = se_resnet20(num_classes=10, reduction=reduction)
    optimizer = optim.SGD(params=model.parameters(),
                          lr=1e-1,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = StepLR(optimizer, 80, 0.1)
    trainer = Trainer(model, optimizer, F.cross_entropy)
    trainer.loop(200, train_loader, test_loader, scheduler)
Exemplo n.º 2
0
def main(batch_size, data_root):
    train_data = MyDataset(
        mode='train',
        txt=data_root + 'train_label_balance.txt',
        transform=transforms.Compose([
            # transforms.RandomResizedCrop(224),
            # transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.3301, 0.3301, 0.3301],
                                 std=[0.1938, 0.1938, 0.1938])
        ]))
    test_data = MyDataset(
        mode='test',
        txt=data_root + 'test_label_balance.txt',
        transform=transforms.Compose([
            # transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.3301, 0.3301, 0.3301],
                                 std=[0.1938, 0.1938, 0.1938])
        ]))

    train_loader = DataLoader(
        train_data, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    test_loader = DataLoader(
        test_data, batch_size=batch_size, shuffle=False, num_workers=8)
    model = UNet(n_channels=3, n_classes=3)
    print(model)
    model = nn.DataParallel(model.cuda(), device_ids=[0])
    optimizer = optim.SGD(params=model.parameters(),
                          lr=0.01, momentum=0.9, weight_decay=1e-5)
    # optimizer = optim.Adam(params=model.parameters(), lr=0.01)
    scheduler = StepLR(optimizer, 10, gamma=0.1)
    trainer = Trainer(model, optimizer, F.cross_entropy, save_dir=".")
    trainer.loop(50, train_loader, test_loader, scheduler)
Exemplo n.º 3
0
def main():
    import sys
    import pathlib

    __dir__ = pathlib.Path(os.path.abspath(__file__))
    sys.path.append(str(__dir__))
    sys.path.append(str(__dir__.parent.parent))

    from models import build_model, build_loss
    from data_loader import get_dataloader
    from utils import Trainer
    from utils import get_post_processing
    from utils import get_metric

    config = anyconfig.load(open('config.yaml', 'rb'))
    train_loader = get_dataloader(config['dataset']['train'])
    validate_loader = get_dataloader(config['dataset']['validate'])
    criterion = build_loss(config['loss']).cuda()
    model = build_model(config['arch'])
    post_p = get_post_processing(config['post_processing'])
    metric = get_metric(config['metric'])

    trainer = Trainer(config=config,
                      model=model,
                      criterion=criterion,
                      train_loader=train_loader,
                      post_process=post_p,
                      metric_cls=metric,
                      validate_loader=validate_loader)
    trainer.train()
Exemplo n.º 4
0
def main(batch_size, data_root):
    transform_train = transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    transform_test = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    traindir = os.path.join(data_root, 'train')
    valdir = os.path.join(data_root, 'val')
    train = datasets.ImageFolder(traindir, transform_train)
    val = datasets.ImageFolder(valdir, transform_test)
    train_loader = torch.utils.data.DataLoader(train,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=8)
    test_loader = torch.utils.data.DataLoader(val,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=8)
    se_resnet = se_resnet50(num_classes=1000)
    optimizer = optim.SGD(params=se_resnet.parameters(),
                          lr=0.6,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = StepLR(optimizer, 30, gamma=0.1)
    trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".")
    trainer.loop(100, train_loader, test_loader, scheduler)
Exemplo n.º 5
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: a dim %r, agent dim: %d' % (env.n_a_ls, env.n_agent))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')
    model = init_agent(env, config['MODEL_CONFIG'], total_step, seed)

    # disable multi-threading for safe SUMO implementation
    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env, model, global_counter, summary_writer, output_path=dirs['data'])
    trainer.run()

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Exemplo n.º 6
0
def main(batch_size, lr, momentum, epsilon, update_freq):
    train_loader, test_loader = get_dataloader(batch_size)

    model = resnet20()
    optimizer = Shampoo(params=model.parameters(), lr=lr, momentum=momentum,
                        weight_decay=1e-4, epsilon=epsilon, update_freq=update_freq)
    trainer = Trainer(model, optimizer, F.cross_entropy)
    trainer.loop(200, train_loader, test_loader)
Exemplo n.º 7
0
def main(batch_size, root):
    train_loader, test_loader = get_dataloader(batch_size, root)
    _se_resnet = se_resnet50(num_classes=1000)
    se_resnet = nn.DataParallel(_se_resnet, device_ids=[0, 1])
    optimizer = optim.SGD(params=se_resnet.parameters(), lr=0.6, momentum=0.9, weight_decay=1e-4)
    scheduler = StepLR(optimizer, 30, gamma=0.1)
    trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".")
    trainer.loop(100, train_loader, test_loader, scheduler)
Exemplo n.º 8
0
def main():
    logger.info('=> PyTorch Version: {}'.format(torch.__version__))

    # Environment initialization
    device, pin_memory = init_device(args.seed, args.cpu, args.gpu,
                                     args.cpu_affinity)

    # Create the data loader
    train_loader, val_loader, test_loader = Cost2100DataLoader(
        root=args.data_dir,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=pin_memory,
        scenario=args.scenario)()

    # Define model
    model = init_model(args)
    model.to(device)

    # Define loss function
    criterion = nn.MSELoss().to(device)

    # Inference mode
    if args.evaluate:
        Tester(model, device, criterion)(test_loader)
        return

    # Define optimizer and scheduler
    lr_init = 1e-3 if args.scheduler == 'const' else 2e-3
    optimizer = torch.optim.Adam(model.parameters(), lr_init)
    if args.scheduler == 'const':
        scheduler = FakeLR(optimizer=optimizer)
    else:
        scheduler = WarmUpCosineAnnealingLR(optimizer=optimizer,
                                            T_max=args.epochs *
                                            len(train_loader),
                                            T_warmup=30 * len(train_loader),
                                            eta_min=5e-5)

    # Define the training pipeline
    trainer = Trainer(model=model,
                      device=device,
                      optimizer=optimizer,
                      criterion=criterion,
                      scheduler=scheduler,
                      resume=args.resume)

    # Start training
    trainer.loop(args.epochs, train_loader, val_loader, test_loader)

    # Final testing
    loss, rho, nmse = Tester(model, device, criterion)(test_loader)
    print(f"\n=! Final test loss: {loss:.3e}"
          f"\n         test rho: {rho:.3e}"
          f"\n         test NMSE: {nmse:.3e}\n")
Exemplo n.º 9
0
def main(batch_size, root):
    train_loader, test_loader = get_dataloader(batch_size, root)
    gpus = list(range(torch.cuda.device_count()))
    se_resnet = nn.DataParallel(se_resnet50(num_classes=345), device_ids=gpus)
    optimizer = optim.SGD(params=se_resnet.parameters(),
                          lr=0.6 / 1024 * batch_size,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1)
    trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".")
    trainer.loop(100, train_loader, test_loader, scheduler)
Exemplo n.º 10
0
def main(batch_size, root):
    # torchvision.datasets.DataLoader()
    #####################################################################
    "The implementation of tensorboardX and topK accuracy is in utils.py"
    #####################################################################

    # test mode: 1=open, 0=close
    test_mode = 1

    # get checkpoint information
    checkpoint_newest = get_checkPoint("./checkpoint/", 1)
    test_loader = get_dataloader(batch_size, root)
    gpus = list(range(torch.cuda.device_count()))

    # initialize your net/optimizer
    nameOfNet = "se_resnet_testResult.csv"
    se_resnet = nn.DataParallel(inceptionv4(num_classes=3), device_ids=gpus)
    optimizer = optim.SGD(params=se_resnet.parameters(),
                          lr=0.6 / 1024 * batch_size,
                          momentum=0.9,
                          weight_decay=1e-4)

    # No existed checkpoint
    if checkpoint_newest == 0:
        print("-------------No checkpoint available!!!!--------------")

    # load existed checkpoint
    else:
        csv_path = "./" + nameOfNet
        csv_writer = open(csv_path, "w")
        csv_writer.write("key_id,word\n")
        checkpoint_newest_list = []
        if isinstance(checkpoint_newest, list) == False:
            checkpoint_newest_list.append(checkpoint_newest)
        else:
            checkpoint_newest_list = checkpoint_newest
        for checkpoint_path in checkpoint_newest_list:
            print("The path of the pretrained model %s" % checkpoint_path)
            print("load pretrained model......")
            checkpoint = torch.load(checkpoint_path)
            se_resnet.load_state_dict(checkpoint['weight'])
            scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1)
            print("The current epoch is %d" % checkpoint['epoch'])
            print("prepare to write the csv file for testing...")
            trainer = Trainer(se_resnet,
                              optimizer,
                              F.cross_entropy,
                              batch_size,
                              csv_writer,
                              save_dir="./checkpoint/",
                              save_freq=1)
            train_loader = None
            trainer.loop(checkpoint['epoch'], train_loader, test_loader,
                         checkpoint['epoch'], scheduler, test_mode)
Exemplo n.º 11
0
def main(batch_size, baseline, reduction):
    train_loader, test_loader = get_dataloader(batch_size)

    if baseline:
        model = resnet20()
    else:
        model = se_resnet20(num_classes=10, reduction=reduction)
    optimizer = optim.SGD(params=model.parameters(),
                          lr=1e-1,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = StepLR(optimizer, 80, 0.1)
    trainer = Trainer(model, optimizer, F.cross_entropy)
    trainer.loop(200, train_loader, test_loader, scheduler)
Exemplo n.º 12
0
def main(batch_size):
    train_loader, test_loader = get_dataloader(batch_size)
    #model= DinkNet34(num_classes=1)
    model = resnet()

    #optimizer = optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9,weight_decay=1e-4)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, 80, 0.1)
    optimizer = optim.Adam(params=model.parameters())
    trainer = Trainer(model,
                      optimizer,
                      nn.CrossEntropyLoss,
                      save_freq=1,
                      save_dir=SAVE_PATH)
    trainer.loop(400, train_loader, test_loader)
Exemplo n.º 13
0
def main():
    # load data
    print("Loading dataset...")
    train_data = COCO_motivations_Dataset(data_root, train=True)
    val_data = COCO_motivations_Dataset(data_root, train=False)

    batch_size = 2
    batch_size = batch_size if len(
        params.gpus) == 0 else batch_size * len(params.gpus)

    train_dataloader = DataLoader(train_data,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    print('train dataset len: {}'.format(len(train_dataloader.dataset)))

    val_dataloader = DataLoader(val_data,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers)
    print('val dataset len: {}'.format(len(val_dataloader.dataset)))

    # models
    # model = resnet34(pretrained=False, modelpath=model_path, num_classes=1000)  # batch_size=120, 1GPU Memory < 7000M
    # model.fc = nn.Linear(512, 256)
    model = resnet101(pretrained=False, modelpath=model_path,
                      num_classes=1000)  # batch_size=60, 1GPU Memory > 9000M
    model.fc = nn.Linear(512 * 4, 256)

    # optimizer
    trainable_vars = [
        param for param in model.parameters() if param.requires_grad
    ]
    print("Training with sgd")
    params.optimizer = torch.optim.SGD(trainable_vars,
                                       lr=init_lr,
                                       momentum=momentum,
                                       weight_decay=weight_decay,
                                       nesterov=nesterov)

    # Train
    params.lr_scheduler = ReduceLROnPlateau(params.optimizer,
                                            'min',
                                            factor=lr_decay,
                                            patience=10,
                                            cooldown=10,
                                            verbose=True)
    trainer = Trainer(model, params, train_dataloader, val_dataloader)
    trainer.train()
Exemplo n.º 14
0
def main(batch_size, root, lrate):
    #####################################################################
    "The implementation of tensorboardX and topK accuracy is in utils.py"
    #####################################################################

    # get checkpoint information
    checkpoint_newest = get_checkPoint("./lr" + str(lrate) + "/checkpoint/")

    #TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
    # write log and visualize the losses of batches of training and testing
    TIMESTAMP = ""
    writer1 = SummaryWriter('./lr' + str(lrate) + '/tensorboard_log/batch/' +
                            TIMESTAMP)
    # write log and visualize the accuracy of batches of training and testing
    writer2 = SummaryWriter('./lr' + str(lrate) + '/tensorboard_log/epoch/' +
                            TIMESTAMP)

    train_loader, test_loader = get_dataloader(batch_size, root)
    gpus = list(range(torch.cuda.device_count()))

    # initialize your net/optimizer
    seresnet50 = nn.DataParallel(se_resnet50(num_classes=340), device_ids=gpus)
    optimizer = optim.SGD(params=seresnet50.parameters(),
                          lr=lrate / 1024 * batch_size,
                          momentum=0.9,
                          weight_decay=1e-4)

    # No existed checkpoint
    if checkpoint_newest == 0:
        scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1)
        trainer = Trainer(seresnet50,
                          optimizer,
                          F.cross_entropy,
                          save_dir="./lr" + str(lrate) + "/checkpoint/",
                          writer1=writer1,
                          writer2=writer2,
                          save_freq=1)
        trainer.loop(50, train_loader, test_loader, 1, scheduler)
    # load existed checkpoint
    else:
        print("The path of the pretrained model %s" % checkpoint_newest)
        print("load pretrained model......")
        checkpoint = torch.load(checkpoint_newest)
        seresnet50.load_state_dict(checkpoint['weight'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              30,
                                              gamma=0.1,
                                              last_epoch=checkpoint['epoch'])
        print("The current epoch is %d" % checkpoint['epoch'])
        trainer = Trainer(seresnet50,
                          optimizer,
                          F.cross_entropy,
                          save_dir="./lr" + str(lrate) + "/checkpoint/",
                          writer1=writer1,
                          writer2=writer2,
                          save_freq=1)
        trainer.loop(100, train_loader, test_loader, checkpoint['epoch'] + 1,
                     scheduler)
Exemplo n.º 15
0
def main(batch_size, baseline, reduction):
    train_loader, test_loader = get_dataloader(batch_size)

    if baseline:
        model = densenet121()
    else:
        model = se_densenet121(num_classes=10)

    optimizer = optim.SGD(params=model.parameters(),
                          lr=1e-1,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 80, 0.1)
    trainer = Trainer(model, optimizer, F.cross_entropy, save_dir="weights")
    trainer.loop(100, train_loader, test_loader, scheduler)
Exemplo n.º 16
0
    def test_create_allows_model_instance_as_parameter_for_foreign_key_field(
            self):
        james = Trainer.create(name='James', age=21)
        meowth = Pokemon.create(name='Meowth', level=19, trainer=james.pk)

        assert james.pk == 1
        assert meowth.trainer.pk == james.pk
Exemplo n.º 17
0
 def test_update_one_field_with_subquery_with_filter(self):
     self.add_trainer(['James', 'Jessie'])
     jessie_name = Trainer.select(Trainer.name).where(Trainer.name == 'Jessie')
     UpdateQuery(db=self.db).table(Trainer).fields(Trainer.age == 42).where(Trainer.name == jessie_name).execute()
     james, jessie = Trainer._db._connection.execute("SELECT age FROM trainer").fetchall()
     assert jessie[0] == 42
     assert james[0] == 21
Exemplo n.º 18
0
def make_crf_trainer(_run, min_freq=1, c2=1.0, max_iter=2**31 - 1):
    params = {
        'feature.minfreq': min_freq,
        'c2': c2,
        'max_iterations': max_iter
    }
    return Trainer(_run, algorithm='lbfgs', params=params)
Exemplo n.º 19
0
    def __init__(self, args, data_settings):

        self.separated_inputs = data_settings["separate_conditions"]
        self.device_names = data_settings["devices"]
        self.conditions = data_settings["conditions"]
        self.elbo = []
        self.elbo_list = []
        self.epoch = args.epochs
        self.name = args.experiment
        self.label = args.experiment
        self.log_normalized_iws = []
        self.precisions = []
        self.q_names = []
        self.q_values = []
        self.splits = []
        self.theta = []
        self.X_post_sample = []
        self.X_sample = []
        # from data_pair.val
        self.data_ids = []
        self.devices = []
        self.treatments = []
        self.trainer = trainer = Trainer(args, add_timestamp=True)
        self.X_obs = []
        # Attributes initialized elsewhere
        self.chunk_sizes = None
        self.ids = None
        self.names = None
        self.times = None
        self.xval_writer = None
Exemplo n.º 20
0
def main():
    parser = create_parser(True)
    args = parser.parse_args()
    spec = load_config_file(args.yaml)  # spec is a dict of dicts of dicts
    trainer = Trainer(args, args.yaml, add_timestamp=True)
    xval_merge = XvalMerge(args, spec["data"], trainer)
    data_pair, val_results = run_on_split(args, split=None, trainer=trainer)
    xval_merge.add(1, data_pair, val_results)
    xval_merge.finalize()
    xval_merge.save(xval_merge.trainer.tb_log_dir)
Exemplo n.º 21
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir) #utils
    init_log(dirs['log'])#utils
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

# init env
    env = init_env(config['ENV_CONFIG']) #seeonce
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) #logging?


    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)#what is this
# init centralized or multi agent

    seed = config.getint('ENV_CONFIG', 'seed')
    if env.agent == 'iddpg':
        model = IDDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step,
                     config['MODEL_CONFIG'], seed=seed)
    elif env.agent == 'maddpg':  #TODO: Add MADDPG
        model = MADDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step,
                     config['MODEL_CONFIG'], seed=seed)
    summary_writer = tf.summary.FileWriter(dirs['log'])#what is this
    trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data'])#utils
    trainer.run()
   #if post_test: #how?
    #    tester = Tester(env, model, global_counter, summary_writer, dirs['data'])
     #   tester.run_offline(dirs['data'])#utils

    # save model#what's this
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Exemplo n.º 22
0
def main(batch_size,
         baseline,
         reduction,
         data_path,
         checkpoint_path,
         lr,
         checkpoint_name=None):
    train_loader, test_loader = get_dataloader(batch_size, data_path)

    if baseline:
        model = resnet20()
    else:
        model = se_resnet20(num_classes=10, reduction=reduction)
    optimizer = optim.SGD(params=model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = StepLR(optimizer, 80, 0.1)
    dict = model.state_dict()
    print(dict.keys())
    trainer = Trainer(model,
                      optimizer,
                      F.cross_entropy,
                      save_dir=checkpoint_path)
    # 加载模型参数
    if checkpoint_name != None:
        checkpoint_path = Path(checkpoint_path)
        ckpt_dir = checkpoint_path / checkpoint_name
        model.load_state_dict(torch.load(ckpt_dir)["weight"])
        print("checkpoint load successfully!")
    trainer.max_acc = max(trainer.test(test_loader), trainer.max_acc)
    trainer.loop(200, train_loader, test_loader, scheduler)
Exemplo n.º 23
0
    def test_create_many(self):
        ntrainers = Trainer._db._connection.execute(
            "SELECT count(*) FROM trainer WHERE name = 'Giovanni' OR name = 'James'"
        ).fetchone()

        assert ntrainers[0] == 0

        Trainer.create_many([{
            'name': 'Giovanni',
            'age': 42
        }, {
            'name': 'James',
            'age': 21
        }])

        trainers = Trainer._db._connection.execute(
            "SELECT name, age FROM trainer WHERE name = 'Giovanni' OR name = 'James'"
        ).fetchall()

        assert trainers[0][0] == 'Giovanni'
        assert trainers[0][1] == 42
        assert trainers[1][0] == 'James'
        assert trainers[1][1] == 21
Exemplo n.º 24
0
def train():
    model, recorder = mdl.Classifier(), Recorder()
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=constants.WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, constants.EPOCHS)
    trainer = Trainer(model, optimizer, scheduler, recorder)

    trainer.fit(constants.EPOCHS)
    trainer.save_model()
    recorder.plot()
Exemplo n.º 25
0
 def __init__(self, args, split=None, trainer=None):
     """
     :param args: a Namespace, from argparse.parse_args
     :param split: an integer between 1 and args.folds inclusive, or None
     :param trainer: a Trainer instance, or None
     """
     self.procdata = None
     # Command-line arguments (Namespace)
     self.args = self._tidy_args(args, split)
     self._fix_random_seed()
     # TODO(dacart): introduce a switch to allow non-GPU use, achieved with:
     # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
     # Utility methods for training a model
     self.trainer = trainer or Trainer(args, args.yaml, add_timestamp=True)
     # Attributes set in other methods:
     # Conditions, input signals of the data that are being modelled
     self.conditions = None
     # DatasetPair, from a training Dataset and validation Dataset
     self.dataset_pair = None
     # Decoder and encoder networks
     self.decoder = None
     self.encoder = None
     # Number of instances in a batch (int)
     self.n_batch = None
     # Number of "theta" parameters: local, global-conditional and global (int)
     self.n_theta = None
     # Collection of attributes related to training objective
     self.objective = None
     # Value of spec["params"] from YAML file (dict)
     self.params_dict = None
     # Collection of placeholder attributes, each a Tensor, fed with new values for each batch
     self.placeholders = None
     # Training feed_dict: dict from placeholder Tensor to np.array
     self.train_feed_dict = None
     # TrainingStepper object
     self.training_stepper = None
     # Validation feed_dict keys: dict from placeholder Tensor to np.array
     self.val_feed_dict = None
     # Model path for storing best weights so far
     self.model_path = os.path.join(self.trainer.tb_log_dir, 'saver',
                                    'sess_max_elbo')
Exemplo n.º 26
0
def main():
    args = get_arguments()

    with open(args.model_params, 'r') as f:
        model_params = json.load(f)

    with open(args.training_params, 'r') as f:
        train_params = json.load(f)

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print('Some arguments are wrong:')
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    receptive_field = WaveNetModel.calculate_receptive_field(
        model_params['filter_width'],
        model_params['dilations'],
        model_params['initial_filter_width'])
    # Save arguments and model params into file
    save_run_config(args, receptive_field, STARTED_DATESTRING, logdir)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Create data loader.
    with tf.name_scope('create_inputs'):
        reader = WavMidReader(data_dir=args.data_dir_train,
                              coord=coord,
                              audio_sample_rate=model_params['audio_sr'],
                              receptive_field=receptive_field,
                              velocity=args.velocity,
                              sample_size=args.sample_size,
                              queues_size=(10, 10*args.batch_size))
        data_batch = reader.dequeue(args.batch_size)

    # Create model.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=model_params['dilations'],
        filter_width=model_params['filter_width'],
        residual_channels=model_params['residual_channels'],
        dilation_channels=model_params['dilation_channels'],
        skip_channels=model_params['skip_channels'],
        output_channels=model_params['output_channels'],
        use_biases=model_params['use_biases'],
        initial_filter_width=model_params['initial_filter_width'])

    input_data = tf.placeholder(dtype=tf.float32,
                                shape=(args.batch_size, None, 1))
    input_labels = tf.placeholder(dtype=tf.float32,
                                  shape=(args.batch_size, None,
                                         model_params['output_channels']))

    loss, probs = net.loss(input_data=input_data,
                           input_labels=input_labels,
                           pos_weight=train_params['pos_weight'],
                           l2_reg_str=train_params['l2_reg_str'])
    optimizer = optimizer_factory[args.optimizer](
                    learning_rate=train_params['learning_rate'],
                    momentum=train_params['momentum'])
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()
    histograms = tf.summary.merge_all(key=HKEY)

    # Separate summary ops for validation, since they are
    # calculated only once per evaluation cycle.
    with tf.name_scope('validation_summaries'):

        metric_summaries = metrics_empty_dict()
        metric_value = tf.placeholder(tf.float32)
        for name in metric_summaries.keys():
            metric_summaries[name] = tf.summary.scalar(name, metric_value)

        images_buffer = tf.placeholder(tf.string)
        images_batch = tf.stack(
            [tf.image.decode_png(images_buffer[0], channels=4),
             tf.image.decode_png(images_buffer[1], channels=4),
             tf.image.decode_png(images_buffer[2], channels=4)])
        images_summary = tf.summary.image('estim', images_batch)

        audio_data = tf.placeholder(tf.float32)
        audio_summary = tf.summary.audio('input', audio_data,
                                         model_params['audio_sr'])

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    # Trainer for keeping best validation-performing model
    # and optional early stopping.
    trainer = Trainer(sess, logdir, train_params['early_stop_limit'], 0.999)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print('Something went wrong while restoring checkpoint. '
              'Training will be terminated to avoid accidentally '
              'overwriting the previous model.')
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)


    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, train_params['num_steps']):
            waveform, pianoroll = sess.run([data_batch[0], data_batch[1]])
            feed_dict = {input_data : waveform, input_labels : pianoroll}
            # Reload switches from file on each step
            with open(RUNTIME_SWITCHES, 'r') as f:
                switch = json.load(f)

            start_time = time.time()
            if switch['store_meta'] and step % switch['store_every'] == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run(
                    [summaries, loss, optim],
                    feed_dict=feed_dict,
                    options=run_options,
                    run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  feed_dict=feed_dict)
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'
                  .format(step, loss_value, duration))

            if step % switch['checkpoint_every'] == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

            # Evaluate model performance on validation data
            if step % switch['evaluate_every'] == 0:
                if switch['histograms']:
                    hist_summary = sess.run(histograms)
                    writer.add_summary(hist_summary, step)
                print('evaluating...')
                stats = 0, 0, 0, 0, 0, 0
                est = np.empty([0, model_params['output_channels']])
                ref = np.empty([0, model_params['output_channels']])

                b_data, b_labels, b_cntr = (
                    np.empty((0, args.sample_size + receptive_field - 1, 1)),
                    np.empty((0, model_params['output_channels'])),
                    args.batch_size)

                # if (batch_size * sample_size > valid_data) single_pass() again
                while est.size == 0: # and ref.size == 0 and sum(stats) == 0 ...

                    for data, labels in reader.single_pass(
                        sess, args.data_dir_valid):

                        # cumulate batch
                        if b_cntr > 1:
                            b_data, b_labels, decr = cumulateBatch(
                                data, labels, b_data, b_labels)
                            b_cntr -= decr
                            continue
                        elif args.batch_size > 1:
                            b_data, b_labels, decr = cumulateBatch(
                                data, labels, b_data, b_labels)
                            if not decr:
                                continue
                            data = b_data
                            labels = b_labels
                            # reset batch cumulation variables
                            b_data, b_labels, b_cntr = (
                                np.empty((
                                    0, args.sample_size + receptive_field - 1, 1
                                )),
                                np.empty((0, model_params['output_channels'])),
                                args.batch_size)

                        predictions = sess.run(
                            probs, feed_dict={input_data : data})
                        # Aggregate sums for metrics calculation
                        stats_chunk = calc_stats(
                            predictions, labels, args.threshold)
                        stats = tuple([sum(x) for x in zip(stats, stats_chunk)])
                        est = np.append(est, predictions, axis=0)
                        ref = np.append(ref, labels, axis=0)

                metrics = calc_metrics(None, None, None, stats=stats)
                write_metrics(metrics, metric_summaries, metric_value,
                              writer, step, sess)
                trainer.check(metrics['f1_measure'])

                # Render evaluation results
                if switch['log_image'] or switch['log_sound']:
                    sub_fac = int(model_params['audio_sr']/switch['midi_sr'])
                    est = roll_subsample(est.T, sub_fac)
                    ref = roll_subsample(ref.T, sub_fac)
                if switch['log_image']:
                    write_images(est, ref, switch['midi_sr'], args.threshold,
                                 (8, 6), images_summary, images_buffer,
                                 writer, step, sess)
                if switch['log_sound']:
                    write_audio(est, ref, switch['midi_sr'],
                                model_params['audio_sr'], 0.007,
                                audio_summary, audio_data,
                                writer, step, sess)

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
        flush_n_close(writer, sess)
Exemplo n.º 27
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')
    # coord = tf.train.Coordinator()

    # if env.agent == 'a2c':
    #     model = A2C(env.n_s, env.n_a, total_step,
    #                 config['MODEL_CONFIG'], seed=seed)
    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    # disable multi-threading for safe SUMO implementation
    # threads = []
    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()
    # if in_test or post_test:
    #     # assign a different port for test env
    #     test_env = init_env(config['ENV_CONFIG'], port=1)
    #     tester = Tester(test_env, model, global_counter, summary_writer, dirs['data'])

    # def train_fn():
    #     trainer.run(coord)

    # thread = threading.Thread(target=train_fn)
    # thread.start()
    # threads.append(thread)
    # if in_test:
    #     def test_fn():
    #         tester.run_online(coord)
    #     thread = threading.Thread(target=test_fn)
    #     thread.start()
    #     threads.append(thread)
    # coord.join(threads)

    # post-training test
    if post_test:
        tester = Tester(env, model, global_counter, summary_writer,
                        dirs['data'])
        tester.run_offline(dirs['data'])

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Exemplo n.º 28
0
               ),batch_size=args.batch_size,shuffle=True,**kwargs
          )
     test_loader = torch.utils.data.DataLoader(
          datasets.CIFAR100('../data',train=False,
               transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((.5,.5,.5),(.5,.5,.5))
                    ])
               ),
          batch_size = args.test_batch_size,shuffle=True,**kwargs
          )

model = vgg(margin=args.margin)
optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum,weight_decay=args.weight_decay)
criterion = nn.CrossEntropyLoss()

print('\nNormal Training \n')
trainer = Trainer(
     model=model,
     optimizer=optimizer,
     criterion=criterion,
     start_epoch=args.start_epoch,
     epochs=args.epochs,
     cuda=args.cuda,
     log_interval=args.log_interval,
     train_loader=train_loader,
     test_loader=test_loader,
     root=args.root,
     )
trainer.start()
Exemplo n.º 29
0
    logger = SummaryWriter(args.log_dir)

    # ROIScoreWriter
    score_writer = ROIScoreWriter(os.path.join(args.log_dir, 'scores.csv'),
                                  ROIs)

else:
    logger = None

timer = time.time()
start = timer

trainer = Trainer(
    model,
    optimizer,
    loss_fn=loss_fn,
    accu_fn='dice',
    load_checkpoint=args.checkpoint,
    logger=logger,
)

validator = Validator(
    trainer.model,
    threshold=config['output_threshold'],
)

if args.validate_only:
    validator.run(data_gen['valid'])
    logger.close()
    print('Total:', time.time() - start)
    exit(0)
Exemplo n.º 30
0
    if MODEL["mode"] == "UNET":
        model = UnetResNet(encoder_name=MODEL["backbone"],
                           num_classes=MODEL["num_classes"],
                           input_channels=3,
                           num_filters=32,
                           Dropout=0.3,
                           res_blocks_dec=MODEL["unet_res_blocks_decoder"])

    elif MODEL["mode"] == "FPN":
        model = FPN(encoder_name=MODEL["backbone"],
                    decoder_pyramid_channels=256,
                    decoder_segmentation_channels=128,
                    classes=MODEL["num_classes"],
                    dropout=0.3,
                    activation='sigmoid',
                    final_upsampling=4,
                    decoder_merge_policy='add')
    else:
        raise ValueError('Model type is not correct: `{}`.'.format(
            MODEL["mode"]))

    model_trainer = Trainer(model=model,
                            image_dataset=image_dataset,
                            optimizer=optim.Adam,
                            **TRAINING)
    model_trainer.start(trainset, valset)

    # copy training config file into created folder
    copyfile(args.config_path,
             os.path.join(TRAINING["model_path"], "train_config.yaml"))
Exemplo n.º 31
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))  #1e6
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))  #2e4
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))  #1e4
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')  #12
    # coord = tf.train.Coordinator()

    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'codql':
        print('This is codql')
        num_agents = len(env.n_s_ls)
        print('num_agents:', num_agents)
        a_dim = env.n_a_ls[0]  # ?????????????????? dim ??or num??
        print('a_dim:', a_dim)
        s_dim = env.n_s_ls[0]
        print('env.n_s_ls=', s_dim)
        s_dim_wait = env.n_w_ls[0]
        print('s_dim_wait:', s_dim_wait)
        #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error
        model = MFQ(nb_agent=num_agents,
                    a_dim=a_dim,
                    s_dim=s_dim,
                    s_dim_wave=s_dim - s_dim_wait,
                    s_dim_wait=s_dim_wait,
                    config=config['MODEL_CONFIG'])
    elif env.agent == 'dqn':
        model = DQN(nb_agent=len(env.n_s_ls),
                    a_dim=env.n_a_ls[0],
                    s_dim=env.n_s_ls[0],
                    s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                    s_dim_wait=env.n_w_ls[0],
                    config=config['MODEL_CONFIG'],
                    doubleQ=False)  #doubleQ=False denotes dqn else ddqn
    elif env.agent == 'ddpg':
        model = DDPGEN(nb_agent=len(env.n_s_ls),
                       share_params=True,
                       a_dim=env.n_a_ls[0],
                       s_dim=env.n_s_ls[0],
                       s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                       s_dim_wait=env.n_w_ls[0])
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Exemplo n.º 32
0
num_workers = 48
num_classes = 2
model_name = "shufflenet"
train_txt = ""
test_txt = ""
model_type = "s2"
model_width = 0.25

init_lr = 0.01
lr_decay = 0.8
momentum = 0.9
weight_decay = 0.000
nesterov = True

# Set Training parameters
params = Trainer.TrainParams()
params.max_epoch = 1000
params.criterion = nn.CrossEntropyLoss()
params.gpus = [2]  # set 'params.gpus=[]' to use CPU mode
params.save_dir = model_path
params.ckpt = None
params.save_freq_epoch = 2

parser = argparse.ArgumentParser()
parser.add_argument('--image_w', type=int, default=64)
parser.add_argument('--image_h', type=int, default=64)
parser.add_argument('--num_classes', type=int, default=2)
parser.add_argument('--batch_size', type=int, default=528)
parser.add_argument('--num_workers', type=int, default=24)
parser.add_argument('--model_name', type=str, default=None)
parser.add_argument('--train_txt', type=str, default=None)