Exemplo n.º 1
0
def trainval(exp_dict, savedir, args):
    """
    exp_dict: dictionary defining the hyperparameters of the experiment
    savedir: the directory where the experiment will be saved
    args: arguments passed through the command line
    """
    # 2. Create data loader and model 
    train_loader = he.get_loader(name=exp_dict['dataset'], split='train', 
                                 datadir=os.path.dirname(savedir),
                                 exp_dict=exp_dict)
    model = he.get_model(name=exp_dict['model'], exp_dict=exp_dict)

    # 3. load checkpoint
    chk_dict = hw.get_checkpoint(savedir)

    # 4. Add main loop
    for epoch in tqdm.tqdm(range(chk_dict['epoch'], 3), 
                           desc="Running Experiment"):
        # 5. train for one epoch
        train_dict = model.train_on_loader(train_loader, epoch=epoch)

        # 6. get and save metrics
        score_dict = {'epoch':epoch, 'acc': train_dict['train_acc'], 
                      'loss':train_dict['train_loss']}
        chk_dict['score_list'] += [score_dict]

        images = model.vis_on_loader(train_loader)

        hw.save_checkpoint(savedir, score_list=chk_dict['score_list'], images=[images])
    
    print('Experiment done\n')
Exemplo n.º 2
0
def trainval(exp_dict, savedir, args):
    """
    exp_dict: dictionary defining the hyperparameters of the experiment
    savedir: the directory where the experiment will be saved
    args: arguments passed through the command line
    """
    # Create data loader and model
    train_loader = he.get_loader(
        name=exp_dict["dataset"], split="train", datadir=os.path.dirname(savedir), exp_dict=exp_dict
    )
    model = he.get_model(name=exp_dict["model"], exp_dict=exp_dict)

    # Resume or initialize checkpoint
    chk_dict = hw.get_checkpoint(savedir)
    if "model_state_dict" in chk_dict and len(chk_dict["model_state_dict"]):
        model.set_state_dict(chk_dict["model_state_dict"])

    # Train and Validate
    for epoch in tqdm.tqdm(range(chk_dict["epoch"], 3), desc="Running Experiment"):
        # Train for one epoch
        train_dict = model.train_on_loader(train_loader, epoch=epoch)

        # Get and save metrics
        score_dict = {"epoch": epoch, "acc": train_dict["train_acc"], "loss": train_dict["train_loss"]}
        chk_dict["score_list"] += [score_dict]

        # Save Checkpoint
        hw.save_checkpoint(savedir, score_list=chk_dict["score_list"])

    print("Experiment done\n")