Exemplo n.º 1
0
def main():
    parent_parser = HyperOptArgumentParser(strategy="grid_search", add_help=False)
    logdir = "logs"
    parent_parser.add_argument(
        "--test_tube_save_path", default=os.path.join(logdir, "test_tube_data")
    )
    parent_parser.add_argument(
        "--model_save_path", default=os.path.join(logdir, "model_weights")
    )
    parent_parser.add_argument(
        "--experiment_name", default=os.path.join(logdir, "vampire")
    )
    parser = VAMPIRE.add_model_specific_args(parent_parser, ".")
    hparams = parser.parse_args()

    model = VAMPIRE(hparams)

    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
    )
    exp.argparse(hparams)
    exp.save()

    trainer = Trainer(experiment=exp, fast_dev_run=False)
    trainer.fit(model)
Exemplo n.º 2
0
def train(hparams, *args):
    """Train your awesome model.

    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        # The experiment version is optional, but using the one
        # from SLURM means the exp will not collide with other
        # versions if SLURM runs multiple at once.
        version=hparams.hpc_exp_number,
        autosave=False,
    )
    exp.argparse(hparams)

    # Pretend to train.
    x = hparams.x_val
    for train_step in range(0, 100):
        y = hparams.y_val
        out = x * y
        exp.log({'fake_err': out.item()})  # Log metrics.

    # Save exp when done.
    exp.save()
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        description='test demo'
    )

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        verbose=True,
        mode='max'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='val_loss',
        mode='min'
    )

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
Exemplo n.º 4
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    model = LightningTemplateModel(hparams)

    # ------------------------
    # 2 INIT EXP
    # ------------------------
    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp)

    # ------------------------
    # 4 START TRAINING
    # ------------------------
    trainer.fit(model)
Exemplo n.º 5
0
def search_train(args, *extra_args):
    exp = Experiment(
        # Location to save the metrics.
        save_dir=args.ckptdir)
    exp.argparse(args)
    train(args, exp)
    exp.save()
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp, gpus=hparams.gpus, use_amp=True)

    # ------------------------
    # 4 START TRAINING
    # ------------------------
    trainer.fit(model)
Exemplo n.º 7
0
def run_experiment(hparams, *_):
    print(os.environ)
    num_workers = int(os.environ['SLURM_NNODES'])
    node_id = int(os.environ['SLURM_NODEID'])

    fold  = 0
    kfold = 5
    debug = True
    path = os.environ['SCRATCH'] + f"/summer_school/hopt{fold}/job" + os.environ['SLURM_TASK_PID'] + os.environ['HOSTNAME']
    print(node_id, path)

    exp = Experiment(save_dir=f'{path}/exp')
    exp.argparse(hparams)

    hparams.optimizer = tfa.optimizers.LAMB(lr=hparams.lr,
                                            weight_decay_rate=hparams.wd)
    print(hparams, flush=True)

    # start trainer
    auc = train(vars(hparams), num_workers, node_id, fold, kfold, debug, path)
    print(auc)

    # save Experiment
    exp.add_scalar('auc', auc)
    exp.save()
Exemplo n.º 8
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    # when using grid search, it's possible for all models to start at once
    # and use the same test tube experiment version
    relative_node_id = int(os.environ['SLURM_NODEID'])
    sleep(relative_node_id + 1)

    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_acc',
                               patience=3,
                               verbose=True,
                               mode='max')

    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor='val_loss',
                                 mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=hparams.gpus,
                      nb_gpu_nodes=hyperparams.nb_gpu_nodes)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
Exemplo n.º 9
0
def train(hparams):

    # this won't crash ever. If no exp number is there, it'll be None
    exp_version_from_slurm_script = hparams.hpc_exp_number

    # init exp and track all the parameters from the HyperOptArgumentParser
    # the experiment version is optional, but using the one from slurm means the exp will not collide with other
    # versions if slurm runs multiple at once.
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        version=exp_version_from_slurm_script,
        autosave=False,
    )
    exp.argparse(hparams)

    # pretend to train
    x = hparams.x_val
    for train_step in range(0, 100):
        y = hparams.y_val
        out = x * y
        exp.log({'fake_err': out.item()})

    # save exp when we're done
    exp.save()
Exemplo n.º 10
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    # build model
    model = LightningTemplateModel(hparams)

    # configure trainer
    trainer = Trainer(experiment=exp)

    # train model
    trainer.fit(model)
Exemplo n.º 11
0
def train(hparams, *args):
    """Train your awesome model.
    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    # if hparams.disease_model:
    #     save_model_path = hparams.save_model_dir+'/disease'
    # else:
    #     save_model_path = hparams.save_model_dir+'/synthetic'
    # Set seeds
    SEED = hparams.seed
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    print(hparams)
    print(args)
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)
    # checkpoint_callback = ModelCheckpoint(
    #     filepath=save_model_path+'/'+hparams.cage_nr +
    #     '/version_'+str(cluster.hpc_exp_number)+'/checkpoints',
    #     verbose=True,
    #     monitor='val_loss',
    #     mode='min',
    #     prefix=''
    # )
    # # Pretend to train.
    # x = torch.rand((1, hparams.x_val))
    # for train_step in range(0, 100):
    #     y = torch.rand((hparams.x_val, 1))
    #     out = x.mm(y)
    #     exp.log({'fake_err': out.item()})

    dsl, \
        trainedmodels,\
        validatedmodels,\
        losses,\
        lossdf,\
        knnres = runevaler("opsitu", hparams.epochs, [ESNNSystem],
                           [TorchEvaler], [eval_dual_ann],
                           networklayers=[hparams.c_layers, hparams.g_layers],
                           lrs=[hparams.lr],
                           dropoutrates=[hparams.dropout],
                           validate_on_k=10, n=1,
                           filenamepostfixes=["esnn"])
    stats = stat(lossdf, hparams.epochs, "esnn")
    print(f"type : {type(stats)}")
    print(f"innertype : {type(stats[0])}")
    print(f"stats : {stats}")
    print(f"stats0 : {stats[0]}")
    exp.log({'loss': stats[0]})
    #exp.log('tng_err': tng_err)
    #exp.log({"loss", stats[0]})
    # Save exp when .
    exp.save()
Exemplo n.º 12
0
def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(
        name='test_tube_exp',
        debug=True,
        save_dir=log_dir,
        version=0,
        autosave=False,
        description='test demo'
    )

    hparams.training_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/training'
    hparams.validation_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/validation'
    hparams.test_task_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/test_task'
    hparams.batch_size = 4

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystem(hparams)

    # callbacks
    early_stop = EarlyStopping(
        monitor=hparams.early_stop_metric,
        patience=hparams.early_stop_patience,
        verbose=True,
        mode=hparams.early_stop_mode
    )

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor=hparams.model_save_monitor_value,
        mode=hparams.model_save_monitor_mode
    )

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        # distributed_backend='dp',
        #gpus=[0],
        nb_sanity_val_steps=2
    )

    # train model
    trainer.fit(model)
Exemplo n.º 13
0
def main(hparams):
    """
    Main training routine specific for this project
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(
        name='dsanet_exp_{}_window={}_horizon={}'.format(hparams.data_name, hparams.window, hparams.horizon),
        save_dir=hparams.test_tube_save_path,
        autosave=False,
        description='test demo'
    )

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=True,
        mode='min'
    )

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        gpus=[0],
        # auto_scale_batch_size=True,
        max_epochs=10,
        # num_processes=2,
        # num_nodes=2
        
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

    print('View tensorboard logs by running\ntensorboard --logdir %s' % os.getcwd())
    print('and going to http://localhost:6006 on your browser')
Exemplo n.º 14
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    # build model
    model = ExampleModel(hparams)

    # callbacks
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        mode='min',
        verbose=True,
    )

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_function=None,
        save_best_only=True,
        verbose=True,
        monitor='val_acc',
        mode='min'
    )

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
Exemplo n.º 15
0
def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # pretend to train
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # save exp when we're done
    exp.save()
def main(hparams):
    # load model
    model = MyModel(hparams)

    # init experiment
    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
        description='baseline attn interval'
    )

    exp.argparse(hparams)
    exp.save()

    # define callbackes
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path,
                                        exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=True,
        mode='min'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='pr',
        mode='max'
    )

    # init trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        val_check_interval=1
    )

    # start training
    trainer.fit(model)
Exemplo n.º 17
0
def main(hparams, cluster):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    # when using grid search, it's possible for all models to start at once
    # and use the same test tube experiment version
    relative_node_id = int(os.environ['SLURM_NODEID'])
    sleep(relative_node_id + 1)

    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        version=hparams.hpc_exp_number,  # match the slurm job version number
        description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp,
                      gpus=hparams.per_experiment_nb_gpus,
                      nb_gpu_nodes=hyperparams.nb_gpu_nodes,
                      distributed_backend=hyperparams.distributed_backend)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
Exemplo n.º 18
0
def main(hparams):

    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description,
    )

    exp.argparse(hparams)
    exp.save()

    model = AutoregressiveFaceVAE(hparams)

    early_stop = EarlyStopping(monitor="avg_val_loss",
                               patience=3,
                               verbose=True,
                               mode="min")

    model_save_path = "{}/{}/{}".format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor="avg_val_loss",
        mode="min",
    )

    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        distributed_backend=hparams.dist_backend,
        # val_check_interval=0.5,
        # distributed_backend="dp",
        # overfit_pct=0.01
    )

    trainer.fit(model)
Exemplo n.º 19
0
def main(hparams, data):
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name=hparams.exp_name,
                     debug=False,
                     save_dir=log_dir,
                     version=0,
                     autosave=True,
                     description='P2R codebase')

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = P2rSystem(hparams, data)

    model_save_path = '{}/{}/version_{}/checkpoints'.format(
        exp.save_dir, exp.name, exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 verbose=True,
                                 monitor='tng_loss',
                                 mode='min',
                                 save_best_only=True)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      checkpoint_callback=checkpoint,
                      min_nb_epochs=1,
                      max_nb_epochs=hparams.max_nb_epochs,
                      track_grad_norm=2,
                      accumulate_grad_batches=1,
                      row_log_interval=1,
                      amp_level='O2',
                      use_amp=True,
                      gpus=1)

    # train model
    trainer.fit(model)
    trainer.test()

    filepath = '{}/_ckpt_epoch_final.ckpt'.format(model_save_path)
    checkpoint.save_model(filepath, False)
Exemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', choices=['srcnn', 'srgan'], required=True)
    parser.add_argument('--scale_factor', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--patch_size', type=int, default=96)
    parser.add_argument('--gpus', type=str, default='0')
    opt = parser.parse_args()

    # load model class
    if opt.model == 'srcnn':
        Model = models.SRCNNModel
    elif opt.model == 'srgan':
        Model = models.SRGANModel

    # add model specific arguments to original parser
    parser = Model.add_model_specific_args(parser)
    opt = parser.parse_args()

    # instantiate experiment
    exp = Experiment(save_dir=f'./logs/{opt.model}')
    exp.argparse(opt)

    model = Model(opt)

    # define callbacks
    checkpoint_callback = ModelCheckpoint(
        filepath=exp.get_media_path(exp.name, exp.version),
    )

    # instantiate trainer
    trainer = Trainer(
        experiment=exp,
        max_nb_epochs=4000,
        add_log_row_interval=50,
        check_val_every_n_epoch=10,
        checkpoint_callback=checkpoint_callback,
        gpus=[int(i) for i in opt.gpus.split(',')]
    )

    # start training!
    trainer.fit(model)
Exemplo n.º 21
0
def main(hparams):
    # init experiment
    experiment_args = parse_argdict_for_method(Experiment.__init__, hparams)
    exp = Experiment(**experiment_args)

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = Network(hparams)

    # callbacks
    if hparams.enable_early_stop:
        early_stop = EarlyStopping(monitor=hparams.monitor_value,
                                   patience=hparams.patience,
                                   verbose=True,
                                   mode=hparams.monitor_mode)
    else:
        early_stop = None

    if hparams.enable_model_checkpoint:
        model_save_path = pathlib.Path(exp.log_dir).parent / 'model_weights'
        checkpoint = ModelCheckpoint(
            filepath=model_save_path,
            save_best_only=hparams.save_best_only,
            save_weights_only=hparams.save_weights_only,
            verbose=True,
            monitor=hparams.monitor_value,
            mode=hparams.monitor_mode)
    else:
        checkpoint = None

    # configure trainer
    trainer_args = parse_argdict_for_method(Trainer.__init__, hparams)
    trainer = Trainer(experiment=exp,
                      early_stop_callback=early_stop,
                      checkpoint_callback=checkpoint,
                      **trainer_args)

    # train model
    trainer.fit(model)
Exemplo n.º 22
0
def train(hparams, *args):
    """Train your awesome model.
    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # Pretend to train.
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # Save exp when .
    exp.save()
Exemplo n.º 23
0
def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # define tensorflow graph
    x = tf.placeholder(dtype=tf.int32, name='x')
    y = tf.placeholder(dtype=tf.int32, name='y')
    out = x * y

    sess = tf.Session()

    # Run the tf op
    for train_step in range(0, 100):
        output = sess.run(out, feed_dict={x: hparams.x_val, y: hparams.y_val})
        exp.log({'fake_err': output})

    # save exp when we're done
    exp.save()
Exemplo n.º 24
0
    "--batch_size",
    default=128,
    type=int,
    tunable=True,
    options=[2**n for n in range(5, 10)],
)

args = parser.parse_args()

args.max_steps = 1000
args.subpolicy_duration = 200
args.num_policies = 10
args.max_buffer_size = 1_000_000
args.env_names = ["Ant-v2"]

exp.argparse(args)

State = Any
Action = Any
Timestep = int


class MasterPolicy(nn.Module):
    """Returns categorical distribution over subpolicies."""
    def __init__(self, state_size, hidden_size, output_size=args.num_policies):
        super().__init__()
        S, H, O = state_size, hidden_size, output_size
        self.fc1 = Linear(S, H)
        self.fc2 = Linear(H, H)
        self.out = Linear(H, O)
Exemplo n.º 25
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        cluster=cluster,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
Exemplo n.º 26
0
            if epoch % 10 == 0 and epoch > 5:
                self.save(self.model_save_dir / 'checkpoints_{}.pth'.format(epoch))

    def save(self, path: Path):
        torch.save(self.net.state_dict(), path)

    def load(self, path: Path):
        self.net.load_state_dict(torch.load(path))


def to_cpu(tensor):
    return tensor.detach().cpu().numpy()


if __name__ == '__main__':
    sequences = ['MOT16-02', 'MOT16-04', 'MOT16-05', 'MOT16-09', 'MOT16-10',
                 'MOT16-11', 'MOT16-13']
    args = get_parser().parse_args()
    args.train_sequences = sequences[:6]
    args.val_sequences = sequences[6:]

    output_dir = Path(args.log_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    logger = Experiment(output_dir, name=args.name, autosave=True,
                        flush_secs=15)
    logger.argparse(args)

    model = GraphNNMOTracker(args, logger)
    model.train()
Exemplo n.º 27
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      on_gpu=on_gpu,
                      cluster=cluster,
                      enable_tqdm=hparams.enable_tqdm,
                      overfit_pct=hparams.overfit,
                      track_grad_norm=hparams.track_grad_norm,
                      fast_dev_run=hparams.fast_dev_run,
                      check_val_every_n_epoch=hparams.check_val_every_n_epoch,
                      accumulate_grad_batches=hparams.accumulate_grad_batches,
                      process_position=process_position,
                      current_gpu_name=current_gpu,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      enable_early_stop=hparams.enable_early_stop,
                      max_nb_epochs=hparams.max_nb_epochs,
                      min_nb_epochs=hparams.min_nb_epochs,
                      train_percent_check=hparams.train_percent_check,
                      val_percent_check=hparams.val_percent_check,
                      test_percent_check=hparams.test_percent_check,
                      val_check_interval=hparams.val_check_interval,
                      log_save_interval=hparams.log_save_interval,
                      add_log_row_interval=hparams.add_log_row_interval,
                      lr_scheduler_milestones=hparams.lr_scheduler_milestones)

    # train model
    trainer.fit(model)
def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    hparams.training_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/training'
    hparams.validation_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/validation'
    hparams.test_task_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/test_task'
    hparams.dummy_datasets = False
    hparams.audio_noise = 3e-3

    hparams.cqt_fmin = 40.
    hparams.cqt_bins_per_octave = 24
    hparams.cqt_n_bins = 216
    hparams.cqt_hop_length = 512
    hparams.cqt_filter_scale = 0.43

    hparams.enc_channels = (1, 8, 16, 32, 64, 128, 256, 512, 512)
    hparams.enc_kernel_1_w = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_1_h = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_2_w = (1, 3, 1, 3, 1, 3, 1, 3)
    hparams.enc_kernel_2_h = (25, 3, 25, 3, 25, 3, 4, 3)
    hparams.enc_padding_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_padding_2 = (0, 1, 0, 1, 0, 1, 0, 0)
    hparams.enc_stride_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_stride_2 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_pooling_1 = (2, 1, 1, 1, 2, 1, 1, 1)

    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 4
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 60

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystem(hparams)

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        #early_stop_callback=early_stop,
        # distributed_backend='dp',
        #gpus=[0],
        nb_sanity_val_steps=2,
        gradient_clip=0.5)

    # train model
    trainer.fit(model)
def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    name = 'immersions_scalogram_resnet_maestro'
    version = 0
    hparams.log_dir = '/home/idivinci3005/experiments/logs'
    hparams.checkpoint_dir = '/home/idivinci3005/experiments/checkpoints/' + name + '/' + str(
        version)
    hparams.training_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.validation_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.test_task_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.audio_noise = 3e-3
    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 32
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 62

    if not os.path.exists(hparams.checkpoint_dir):
        os.mkdir(hparams.checkpoint_dir)

    exp = Experiment(name=name,
                     debug=False,
                     save_dir=hparams.log_dir,
                     version=version,
                     autosave=False,
                     description='maestro dataset experiment')

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystemMaestro(hparams)
    task_model = MaestroClassificationTaskModel(
        model, task_dataset_path=hparams.test_task_set_path)
    model.test_task_model = task_model

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    checkpoint = ModelCheckpoint(filepath=hparams.checkpoint_dir,
                                 save_best_only=False,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        #early_stop_callback=early_stop,
        # distributed_backend='dp',
        gpus=[0],
        nb_sanity_val_steps=5,
        val_check_interval=0.1,
        val_percent_check=0.25,
        #train_percent_check=0.01
    )

    # train model
    trainer.fit(model)
Exemplo n.º 30
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = hparams.gpus is not None and torch.cuda.is_available()

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # gpus are ; separated for inside a node and , within nodes
    gpu_list = None
    if hparams.gpus is not None:
        gpu_list = [int(x) for x in hparams.gpus.split(';')]

    # configure trainer
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=gpu_list)

    # train model
    trainer.fit(model)