def main(args):
    init_logger()
    set_seeds()
    tokenizer = load_tokenizer(args)

    if args.do_train:
        if args.model_4:
            first =TrainerFirst(args, tokenizer)
            first.train()
            second = SecondClassifier(args, tokenizer)
            second.classifier()
            third = Trainermmd(args, tokenizer)
            third.train()
        elif args.new_model_4:
            first =FirstTrainer(args, tokenizer)
            first.train()
            second = SecondClassifier_n(args, tokenizer)
            second.classifier()
            third = Trainermmd_n(args, tokenizer)
            third.train()
        else:
            trainer = Trainer(args, tokenizer)
            trainer.train()
    elif args.do_test:
        if args.model_4 or args.new_model_4:
            tester = Predictor(args, tokenizer)
            tester.predict()
        else:    
            tester = Tester(args, tokenizer)
            tester.test()
    elif args.do_interactive:
        interactive_predict(args)
Exemplo n.º 2
0
    def __init__(self,
                 vocab_size,
                 pretrain_file,
                 device,
                 data_parallel,
                 n_top_layers=4,
                 hidden_dim=768,
                 max_sents_len=512,
                 hidden_dropout_rate=0.1,
                 attn_dropout_rate=0.1,
                 n_heads=12,
                 n_layers=12,
                 variance_epsilon=1e-12):

        super().__init__()
        self.pretrain_file = pretrain_file
        self.data_parallel = data_parallel
        self.device = device

        set_seeds(42)

        self.model = BertAverage(vocab_size, n_top_layers, hidden_dim,
                                 max_sents_len, hidden_dropout_rate,
                                 attn_dropout_rate, n_heads, n_layers,
                                 variance_epsilon)

        self.model.eval()
        self.load(self.model, self.pretrain_file)
        self.model.double().to(self.device)
        if self.data_parallel:
            self.model = DataParallel(self.model)

        self.Instance = namedtuple('Instance', ['id', 'sentence', 'label'])
Exemplo n.º 3
0
def main(task='mrpc',
         train_cfg='config/train_mrpc.json',
         model_cfg='config/bert_base.json',
         data_file='../glue/MRPC/train.tsv',
         model_file=None,
         pretrain_file='../uncased_L-12_H-768_A-12/bert_model.ckpt',
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         save_dir='../exp/bert/mrpc',
         max_len=128,
         mode='train'):

    cfg = train.Config.from_json(train_cfg)
    model_cfg = models.Config.from_json(model_cfg)

    set_seeds(cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab,
                                           do_lower_case=True)
    TaskDataset = dataset_class(
        task)  # task dataset class according to the task
    pipeline = [
        Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
        AddSpecialTokensWithTruncation(max_len),
        TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels,
                      max_len)
    ]
    dataset = TaskDataset(data_file, pipeline)
    data_iter = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True)

    model = Classifier(model_cfg, len(TaskDataset.labels))
    criterion = nn.CrossEntropyLoss()

    trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model),
                            save_dir, get_device())

    if mode == 'train':

        def get_loss(model, batch,
                     global_step):  # make sure loss is a scalar tensor
            input_ids, segment_ids, input_mask, label_id = batch
            logits = model(input_ids, segment_ids, input_mask)
            loss = criterion(logits, label_id)
            return loss

        trainer.train(get_loss, model_file, pretrain_file, data_parallel)

    elif mode == 'eval':

        def evaluate(model, batch):
            input_ids, segment_ids, input_mask, label_id = batch
            logits = model(input_ids, segment_ids, input_mask)
            _, label_pred = logits.max(1)
            result = (label_pred == label_id).float()  #.cpu().numpy()
            accuracy = result.mean()
            return accuracy, result

        results = trainer.eval(evaluate, model_file, data_parallel)
        total_accuracy = torch.cat(results).mean().item()
        print('Accuracy:', total_accuracy)
Exemplo n.º 4
0
    def __init__(self, args):
        self.args = args
        cfg = train.Config.from_json(args.train_cfg)
        model_cfg = models.Config.from_json(args.model_cfg)
        set_seeds(cfg.seed)

        tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab,
                                               do_lower_case=True)
        tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x)
                                                )

        pipeline = [
            Preprocess4Pretrain(args.max_pred, args.mask_prob,
                                list(tokenizer.vocab.keys()),
                                tokenizer.convert_tokens_to_ids,
                                model_cfg.max_len, args.mask_alpha,
                                args.mask_beta, args.max_gram)
        ]
        data_iter = DataLoader(SentPairDataset(args.data_file,
                                               cfg.batch_size,
                                               tokenize,
                                               model_cfg.max_len,
                                               pipeline=pipeline),
                               batch_size=cfg.batch_size,
                               collate_fn=seq_collate,
                               num_workers=mp.cpu_count())

        model = Generator(model_cfg)

        self.optimizer = optim.optim4GPU(cfg, model)
        self.trainer = train.MLMTrainer(cfg, model, data_iter, self.optimizer,
                                        args.save_dir, get_device())
        os.makedirs(os.path.join(args.log_dir, args.name), exist_ok=True)
        self.writer = SummaryWriter(log_dir=os.path.join(
            args.log_dir, args.name))  # for tensorboardX
Exemplo n.º 5
0
def main(config='config/finetune/agnews/train.json'):

    cfg = Config(**json.load(open(config, "r")))

    cfg_data = data.Config(**json.load(open(cfg.cfg_data, "r")))
    cfg_model = models.Config(**json.load(open(cfg.cfg_model, "r")))
    cfg_optim = trainer.Config(**json.load(open(cfg.cfg_optim, "r")))

    set_seeds(cfg.seed)

    TaskDataset = data.get_class(
        cfg_data.task)  # task dataset class according to the task
    tokenizer = tokenization.FullTokenizer(vocab_file=cfg_data.vocab_file,
                                           do_lower_case=True)
    dataset = TaskDataset(
        cfg_data.data_file[cfg.mode],
        pipelines=[
            data.RemoveSymbols('\\'),
            data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
            data.AddSpecialTokensWithTruncation(cfg_data.max_len),
            data.TokenIndexing(tokenizer.convert_tokens_to_ids,
                               TaskDataset.labels, cfg_data.max_len)
        ],
        n_data=None)
    dataset = TensorDataset(*dataset.get_tensors())  # To Tensors
    data_iter = DataLoader(dataset,
                           batch_size=cfg_optim.batch_size,
                           shuffle=True)

    classifier = models.Classifier4Transformer(cfg_model,
                                               len(TaskDataset.labels))
    optimizer = optim.optim4GPU(cfg_optim, classifier)

    train_loop = trainer.TrainLoop(cfg_optim, classifier, data_iter, optimizer,
                                   cfg.save_dir, get_device())

    def get_loss(model, batch,
                 global_step):  # make sure loss is a scalar tensor
        input_ids, segment_ids, input_mask, label_id = batch
        logits = model(input_ids, segment_ids, input_mask)
        loss = nn.CrossEntropyLoss()(logits, label_id)
        return loss

    def evaluate(model, batch):
        input_ids, segment_ids, input_mask, label_id = batch
        logits = model(input_ids, segment_ids, input_mask)
        _, label_pred = logits.max(1)
        result = (label_pred == label_id).float()  #.cpu().numpy()
        accuracy = result.mean()
        return accuracy, result

    if cfg.mode == "train":
        train_loop.train(get_loss, cfg.model_file, cfg.pretrain_file)
        print("Training has been done properly.")

    elif cfg.mode == "eval":
        results = train_loop.eval(evaluate, cfg.model_file)
        total_accuracy = torch.cat(results).mean().item()
        print(f"Accuracy: {total_accuracy}")
Exemplo n.º 6
0
def main():
    args = parse_args()
    set_seeds(args.seed)

    # Because I keep forgetting to select the correct env
    assert args.env in args.history_file

    env = create_env(args.env)
    print("Loading environment", args.env)
    print("Action space:", env.action_space)
    print("Observation space:", env.observation_space)

    history = pickle_load(args.history_file)
    if args.best_agent:
        agents = get_best_agents_dedup(history, 1)
        eval_outfile = 'eval_best_agent'
    elif args.best_agent_ensemble:
        agents = get_best_agents_dedup(history, 3)
        eval_outfile = 'eval_best_agent_ensemble'
    elif args.last_agent:
        agents = [history['agents'][-1][0]]
        eval_outfile = 'eval_last_agent'
    elif args.last_agent_ensemble:
        agents = history['agents'][-1][:3]
        eval_outfile = 'eval_last_agent_ensemble'
    else:
        assert False

    dim_in, dim_out = get_input_output_dim(env)
    policies = [a.get_policy(dim_in, dim_out) for a in agents]
    policy = EnsemblePolicy(policies)

    if args.evaluate:
        total_rew = []
        for i in range(args.evaluate):
            env.seed(args.seed + i)
            ep_rew, ep_len = run_episode(env, policy)
            total_rew.append(ep_rew)
            print(
                f"ep = {i + 1}/{args.evaluate} reward = {ep_rew:.2f} len = {ep_len}"
            )
        print(
            f"mean_reward = {np.mean(total_rew):.2f} +- {np.std(total_rew):.2f}"
        )
        np.save(Path(args.history_file).with_name(eval_outfile), total_rew)

    elif args.render:
        for i in count():
            env.seed(args.seed + i)
            ep_rew, ep_len = run_episode(env, policy, render_human)
            print(f"ep = {i + 1}/inf reward = {ep_rew:.2f} len = {ep_len}")

    elif args.gif:
        render_gif = RenderGif()
        env.seed(args.seed)
        run_episode(env, policy, render_gif)
        render_gif.save(args.gif)
Exemplo n.º 7
0
def main(train_cfg='config/bert_pretrain.json',
         model_cfg='config/bert_base.json',
         data_file='../tbc/books_large_all.txt',
         model_file=None,
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         save_dir='../exp/bert/pretrain',
         log_dir='../exp/bert/pretrain/runs',
         max_len=512,
         max_pred=20,
         mask_prob=0.15):

    train_cfg = BertTrainConfig.from_json(train_cfg)
    model_cfg = BertModelConfig.from_json(model_cfg)

    set_seeds(train_cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True)
    tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x))

    pipeline = [Preprocess4Pretrain(max_pred,
                                    mask_prob,
                                    list(tokenizer.vocab.keys()),
                                    tokenizer.convert_tokens_to_ids,
                                    max_len)]
    data_iter = SentPairDataLoader(data_file,
                                   train_cfg.batch_size,
                                   tokenize,
                                   max_len,
                                   pipeline=pipeline)

    model = BertModel4Pretrain(model_cfg)
    criterion1 = nn.CrossEntropyLoss(reduction='none')
    criterion2 = nn.CrossEntropyLoss()

    optimizer = optim.optim4GPU(train_cfg, model)
    trainer = train.Trainer(train_cfg, model_cfg, model, data_iter, optimizer, save_dir, get_device())

    writer = SummaryWriter(log_dir=log_dir) # for tensorboardX

    def get_loss(model, batch, global_step, train_cfg, model_cfg): # make sure loss is tensor
        input_ids, segment_ids, input_mask, masked_ids, masked_pos, masked_weights, is_next = batch

        logits_lm, logits_clsf = model(input_ids, segment_ids, input_mask, masked_pos)
        loss_lm = criterion1(logits_lm.transpose(1, 2), masked_ids) # for masked LM
        loss_lm = (loss_lm*masked_weights.float()).mean()
        loss_clsf = criterion2(logits_clsf, is_next) # for sentence classification
        writer.add_scalars('data/scalar_group',
                           {'loss_lm': loss_lm.item(),
                            'loss_clsf': loss_clsf.item(),
                            'loss_total': (loss_lm + loss_clsf).item(),
                            'lr': optimizer.get_lr()[0],
                           },
                           global_step)
        return loss_lm + loss_clsf

    trainer.train(get_loss, model_file, None, data_parallel)
Exemplo n.º 8
0
def main(task='mrpc',
         base_train_cfg='config/QDElectra_pretrain.json',
         train_cfg='config/train_mrpc.json',
         model_cfg='config/QDElectra_base.json',
         data_file='../glue/MRPC/train.tsv',
         model_file=None,
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         log_dir='../exp/electra/pretrain/runs',
         save_dir='../exp/bert/mrpc',
         mode='train',
         pred_distill=True):
    train_cfg_dict = json.load(open(base_train_cfg, "r"))
    train_cfg_dict.update(json.load(open(train_cfg, "r")))
    train_cfg = ElectraConfig().from_dict(train_cfg_dict)
    # train_cfg = ElectraConfig().from_json_file(train_cfg)
    model_cfg = ElectraConfig().from_json_file(model_cfg)
    output_mode, train_cfg.n_epochs, max_len = get_task_params(task)
    set_seeds(train_cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True)
    TaskDataset = dataset_class(task) # task dataset class according to the task
    num_labels = len(TaskDataset.labels)
    pipeline = [
        Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
        AddSpecialTokensWithTruncation(max_len),
        TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, output_mode, max_len)
    ]
    data_set = TaskDataset(data_file, pipeline)
    data_iter = DataLoader(data_set, batch_size=train_cfg.batch_size, shuffle=True)

    t_discriminator = ElectraForSequenceClassification.from_pretrained(
        'google/electra-base-discriminator'
    )
    s_discriminator = QuantizedElectraForSequenceClassification.from_pretrained(
        'google/electra-small-discriminator', config=model_cfg
    )
    model = DistillElectraForSequenceClassification(t_discriminator, s_discriminator, model_cfg)

    optimizer = optim.optim4GPU(train_cfg, model)
    writer = SummaryWriter(log_dir=log_dir) # for tensorboardX

    base_trainer_args = (train_cfg, model_cfg, model, data_iter, optimizer, save_dir, get_device())
    trainer = QuantizedDistillElectraTrainer(writer, *base_trainer_args)

    if mode == 'train':
        trainer.train(model_file, None, data_parallel)
    elif mode == 'eval':
        input_ids, attention_mask, token_type_ids, label_ids = TokenIndexing(tokenizer.convert_tokens_to_ids,
                                                                            TaskDataset.labels,
                                                                            output_mode,
                                                                            max_len)
        _, eval_labels = get_tensor_data(output_mode, input_ids, attention_mask, token_type_ids, label_ids)
        results = trainer.eval(model_file, output_mode, eval_labels, num_labels, data_parallel)
        total_accuracy = torch.cat(results).mean().item()
        print('Accuracy:', total_accuracy)
Exemplo n.º 9
0
def main(train_cfg='config/electra_pretrain.json',
         model_cfg='config/electra_small.json',
         data_file='../tbc/books_large_all.txt',
         model_file=None,
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         log_dir='../exp/electra/pretrain/runs',
         save_dir='../exp/electra/pretrain',
         max_len=128,
         max_pred=20,
         mask_prob=0.15,
         quantize=False):

    check_dirs_exist([log_dir, save_dir])

    train_cfg = ElectraConfig().from_json_file(train_cfg)
    model_cfg = ElectraConfig().from_json_file(model_cfg)

    set_seeds(train_cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab,
                                           do_lower_case=True)
    tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x))

    pipeline = [
        Preprocess4Pretrain(max_pred, mask_prob, list(tokenizer.vocab.keys()),
                            tokenizer.convert_tokens_to_ids, max_len)
    ]

    data_iter = SentPairDataLoader(data_file,
                                   train_cfg.batch_size,
                                   tokenize,
                                   max_len,
                                   pipeline=pipeline)

    # Get distilled-electra and quantized-distilled-electra
    generator = ElectraForMaskedLM.from_pretrained(
        'google/electra-small-generator')
    t_discriminator = ElectraForPreTraining.from_pretrained(
        'google/electra-base-discriminator')
    s_discriminator = QuantizedElectraForPreTraining(
        model_cfg) if quantize else ElectraForPreTraining
    s_discriminator = s_discriminator.from_pretrained(
        'google/electra-small-discriminator', config=model_cfg)  # model
    # config is used for model "QuantizedElectraForPreTraining"
    model = DistillElectraForPreTraining(generator, t_discriminator,
                                         s_discriminator, model_cfg)

    optimizer = optim.optim4GPU(train_cfg, model)
    writer = SummaryWriter(log_dir=log_dir)  # for tensorboardX

    base_trainer_args = (train_cfg, model_cfg, model, data_iter, None,
                         optimizer, save_dir, get_device())
    trainer = QuantizedDistillElectraTrainer(writer, *base_trainer_args)
    trainer.train(model_file, None, data_parallel)
    trainer._eval()
Exemplo n.º 10
0
 def test_random_gridworld_generation(self):
     set_seeds(314159)
     mdp = GridworldMdp.generate_random(8, 8, 0, 0)
     self.assertEqual(mdp.height, 8)
     self.assertEqual(mdp.width, 8)
     mdp_string = str(mdp)
     self.assertEqual(mdp_string.count('X'), 28)
     self.assertEqual(mdp_string.count(' '), 34)
     self.assertEqual(mdp_string.count('A'), 1)
     self.assertEqual(mdp_string.count('3'), 1)
Exemplo n.º 11
0
def main(opt):
    with open('config.json', 'r', encoding='utf-8') as f:
        args = AttrDict(json.load(f))
    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
    init_logger()
    set_seeds()
    tokenizer = load_tokenizer(args)
    
    if opt.train:
        trainer = Trainer(args, tokenizer)
        trainer.train()
Exemplo n.º 12
0
def main(args):

    cfg = train.Config.from_json(args.train_cfg)
    model_cfg = models.Config.from_json(args.model_cfg)

    set_seeds(cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab,
                                           do_lower_case=True)
    tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x))

    pipeline = [
        Preprocess4Pretrain(args.max_pred, args.mask_prob,
                            list(tokenizer.vocab.keys()),
                            tokenizer.convert_tokens_to_ids, model_cfg.max_len,
                            args.mask_alpha, args.mask_beta, args.max_gram)
    ]
    data_iter = SentPairDataLoader(args.data_file,
                                   cfg.batch_size,
                                   tokenize,
                                   model_cfg.max_len,
                                   pipeline=pipeline)

    model = BertModel4Pretrain(model_cfg)
    criterion1 = nn.CrossEntropyLoss(reduction='none')
    criterion2 = nn.CrossEntropyLoss()

    optimizer = optim.optim4GPU(cfg, model)
    trainer = train.Trainer(cfg, model, data_iter, optimizer, args.save_dir,
                            get_device())

    writer = SummaryWriter(log_dir=args.log_dir)  # for tensorboardX

    def get_loss(model, batch, global_step):  # make sure loss is tensor
        input_ids, segment_ids, input_mask, masked_ids, masked_pos, masked_weights, is_next = batch

        logits_lm, logits_clsf = model(input_ids, segment_ids, input_mask,
                                       masked_pos)
        loss_lm = criterion1(logits_lm.transpose(1, 2),
                             masked_ids)  # for masked LM
        loss_lm = (loss_lm * masked_weights.float()).mean()
        loss_sop = criterion2(logits_clsf,
                              is_next)  # for sentence classification
        writer.add_scalars(
            'data/scalar_group', {
                'loss_lm': loss_lm.item(),
                'loss_sop': loss_sop.item(),
                'loss_total': (loss_lm + loss_sop).item(),
                'lr': optimizer.get_lr()[0],
            }, global_step)
        return loss_lm + loss_sop

    trainer.train(get_loss, model_file=None, data_parallel=True)
Exemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(description='Pretraining argument parser')
    parser = load_pretrain_args(parser)
    parser = load_test_args(parser)
    args = parser.parse_args()

    set_seeds(args.seed)

    train_data = get_train_data()
    valid_data = get_valid_data()
    test_data = get_test_data()

    nnet = create_nnet(train_data, args)

    optimizer = Adam(nnet.parameters(), lr=args.lr)
    ce_loss = nn.CrossEntropyLoss()
    mse_loss = nn.MSELoss()

    action_space = ActionSpace()

    tb = SummaryWriter()

    best_score = 0

    for epoch in range(1, args.update_epochs + 1):
        print(f'Epoch {epoch}')

        for indice in random_batch(len(train_data), args.train_batch_size):
            batch = train_data[indice]
            input_batch = to_input_batch(batch, torch.device('cuda'))

            policies, values = nnet(input_batch)

            target_policies = get_target_policies(batch, action_space).cuda()
            target_values = get_target_values(batch).cuda()

            policy_loss = ce_loss(policies, target_policies)
            value_loss = mse_loss(values, target_values)
            loss = policy_loss + value_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        accuracy = test(valid_data, nnet, args, tb, epoch)

        if accuracy > best_score:
            best_score = accuracy
            torch.save(nnet.module.state_dict(), 'models/pretrained.pt')

    nnet.module.load_state_dict(torch.load('models/pretrained.pt'))

    test(test_data, nnet, args, tb, args.update_epochs + 1)
Exemplo n.º 14
0
 def compare_agents(self, name, agent1, agent2, places=7, print_mdp=False):
     print('Comparing {0} agents'.format(name))
     set_seeds(314159)
     mdp = GridworldMdp.generate_random_connected(16, 16, 5, 0.2)
     if print_mdp: print(mdp)
     env = Mdp(mdp)
     self.time(lambda: agent1.set_mdp(mdp), "Python planner")
     self.time(lambda: agent2.set_mdp(mdp), "Numpy/Tensorflow planner")
     for s in mdp.get_states():
         for a in mdp.get_actions(s):
             mu = agent1.extend_state_to_mu(s)
             qval1, qval2 = agent1.qvalue(mu, a), agent2.qvalue(mu, a)
             self.assertAlmostEqual(qval1, qval2, places=places)
Exemplo n.º 15
0
def main():
    args = parse_args()

    # Fix the random seeds.
    set_seeds(args.random_seed)

    # Default torch settings.
    torch.set_default_dtype(torch.float64)
    if torch.cuda.is_available():
        torch.set_default_tensor_type(torch.cuda.DoubleTensor)

    # load data
    gpdists, g = load_graph_pdists(args.input_graph,
                                   cache_dir='.cached_pdists')
    n_nodes = g.number_of_nodes()
    ds = GraphDataset(gpdists)
    fp = FastPrecision(g)

    # run hyp2
    hyp = Lorentz(3)
    emb = ManifoldEmbedding(n_nodes, [hyp] * args.n_factors)
    for i in range(args.n_factors):
        emb.scales[i] = torch.nn.Parameter(torch.tensor(2.0))
    man_name = '_'.join('hyp2' for _ in range(args.n_factors))
    save_dir = os.path.join(args.save_dir, man_name)
    if args.hyp_snapshot or args.hyp_pretrained:
        logging.info('Loading embedding for %s', man_name)
        load_embedding(emb, save_dir)
    if not args.hyp_pretrained:
        train(ds, fp, emb, args.n_epochs, save_dir)

    # map it to SPD
    spd = SPD(2 * args.n_factors)
    spd_emb = ManifoldEmbedding(n_nodes, [spd])
    save_dir = os.path.join(args.save_dir, 'spd{}'.format(spd.dim))
    if args.spd_snapshot:
        logging.info('Loading embedding for SPD%d', spd.dim)
        load_embedding(spd_emb, save_dir)
    else:
        with torch.no_grad():
            spd_emb.xs[0] = ManifoldParameter(block_diag([
                h2_to_sspd2(emb.xs[i].mul(math.sqrt(2)))
                for i in range(args.n_factors)
            ]),
                                              manifold=spd)
        hyp_dists = emb.to('cpu').compute_dists(None)
        spd_dists = spd_emb.compute_dists(None).to('cpu')
        assert torch.allclose(hyp_dists, spd_dists, atol=1e-4)

    # run spd2
    train(ds, fp, spd_emb, args.n_epochs, save_dir, args.n_epochs)
    def __init__(self, model, loaders, criterion, optim, scheduler,
                 val_step, is_tensorboard, verbose, device, random_seeds=42):
        self.model = model.to(device)
        self.loaders = loaders
        self.criterion = criterion.to(device)
        self.optim = optim
        self.scheduler = scheduler

        self.val_step = val_step
        self.checkpoint_save_step = 100
        self.verbose = verbose
        self.is_tensorboard = is_tensorboard
        self.device = device
        set_seeds(random_seeds)
Exemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(description='Test argument parser')
    parser = load_test_args(parser)
    args = parser.parse_args()

    set_seeds(args.seed)

    test_data = get_test_data()

    nnet = create_nnet(test_data, args)

    nnet.module.load_state_dict(torch.load(f'models/{args.load}'))

    tb = SummaryWriter()

    start_time = time()
    policy_test(test_data, nnet, args, tb, epoch=0)
    print(f'test time: {time() - start_time:.3f} sec.')
Exemplo n.º 18
0
def main():
    mp.set_start_method('spawn')
    mpp.Pool.istarmap = istarmap  # for tqdm

    parser = argparse.ArgumentParser(description='Training argument parser')
    parser = load_train_args(parser)
    parser = load_test_args(parser)
    args = parser.parse_args()

    set_seeds(args.seed)

    train_data = get_train_data()
    valid_data = get_valid_data()

    nnet = create_nnet(train_data, args)
    nnet.module.load_state_dict(torch.load(f'models/{args.load}'))
    nnets = create_nnets(train_data, args, n_nnets=torch.cuda.device_count())

    optimizer = Adam(nnet.parameters(), lr=args.lr)
    policy_loss_fn = nn.KLDivLoss(reduction='batchmean')
    value_loss_fn = nn.MSELoss()

    action_space = ActionSpace()

    train_examples = deque(maxlen=args.examples_len)

    tb = SummaryWriter()  # tensorboard writer

    epoch = 0
    while True:
        for indice in random_batch(len(train_data), args.train_batch_size):
            epoch += 1
            print(f'Epoch {epoch}')

            copy_nnet(nnet, nnets)  # nnet -> nnets

            curr_examples = simulate(train_data[indice], nnets, action_space,
                                     args)
            train_examples.extend(curr_examples)

            update_net(train_examples, nnet, optimizer, policy_loss_fn,
                       value_loss_fn, args, tb, epoch)

            test(valid_data, nnet, args, tb, epoch)
Exemplo n.º 19
0
    def __init__(self, opts):
        self.opts = opts
        self.device = opts['device']

        # Logdir
        self.logdir = os.path.join(opts['logdir'], opts['exp_name'],
                                   opts['variant_name'])
        io.makedirs(self.logdir)

        # Set seeds
        rn = utils.set_seeds(opts['seed'])

        self.model = MetaSim(opts).to(self.device)
        self.generator = self.model.generator

        tasknet_class = get_tasknet(opts['dataset'])
        self.tasknet = tasknet_class(opts['task']).to(
            self.opts['task']['device'])

        # Data
        sgl = get_scene_graph_loader(opts['dataset'])
        self.scene_graph_dataset = sgl(self.generator,
                                       self.opts['epoch_length'])

        # Rendering layer
        self.renderer = RenderLayer(self.generator, self.device)

        # MMD
        self.mmd = MMDInception(device=self.device,
                                resize_input=self.opts['mmd_resize_input'],
                                include_image=False,
                                dims=self.opts['mmd_dims'])

        dl = get_loader(opts['dataset'])
        self.target_dataset = dl(self.opts['task']['val_root'])
        # In the paper, this is different
        # than the data used to get task net acc.
        # Keeping it the same here for simplicity to
        # reduce memory overhead. To do this correctly,
        # generate another copy of the target data
        # and use it for MMD computation.

        # Optimizer
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=opts['optim']['lr'],
            weight_decay=opts['optim']['weight_decay'])

        # LR scheduler
        self.lr_sched = torch.optim.lr_scheduler.StepLR(
            self.optimizer,
            step_size=opts['optim']['lr_decay'],
            gamma=opts['optim']['lr_decay_gamma'])
def generate(cfg):
    from tqdm import trange
    from utils import set_seeds

    cfg.setdefault('seed', 0)
    set_seeds(cfg.seed)
    print(f'Random seed: {cfg.seed}')

    folder = Path(cfg.folder).expanduser().resolve()
    folder.mkdir(parents=True, exist_ok=True)
    print(f'Saving datasets in: {folder}')

    with open(folder / 'datasets.yaml', 'w') as f:
        f.write(cfg.toYAML())
    for p, params in cfg.datasets.items():
        dataset = InfectionDataset(**{k: v for k, v in params.items() if k != 'num_samples'})
        dataset.samples = [dataset.random_sample() for _ in
                           trange(params.num_samples, desc=p.capitalize(), unit='samples', leave=True)]
        path = folder.joinpath(p).with_suffix('.pt')
        torch.save(dataset, path)
        print(f'{p.capitalize()}: saved {len(dataset)} samples in: {path}')
Exemplo n.º 21
0
def main(_):
    print("Model Architecture: {}".format(FLAGS.model_architecture))

    # Adjust some parameters
    if FLAGS.debug:
        FLAGS.small_label_set = False
        print("RUNNING IN DEBUG MODE")

    FLAGS.num_classes = utils.get_num_classes(FLAGS)

    X_train, y_train = data_utils.load_dataset_tf(FLAGS, mode="train")
    X_val, y_val = data_utils.load_dataset_tf(FLAGS, mode="val")

    # comet_ml experiment logging (https://www.comet.ml/)
    experiment = Experiment(api_key="J55UNlgtffTDmziKUlszSMW2w",
                            log_code=False)
    experiment.log_multiple_params(utils.gather_params(FLAGS))
    experiment.set_num_of_epocs(FLAGS.epochs)
    experiment.log_dataset_hash(X_train)

    tf.logging.set_verbosity(tf.logging.INFO)

    # Start a new, DEFAULT TensorFlow session.
    sess = tf.InteractiveSession()

    utils.set_seeds()  # Get deterministic behavior?

    model = models.create_model(FLAGS)
    fw = framework.Framework(sess, model, experiment, FLAGS)

    num_params = int(utils.get_number_of_params())
    model_size = num_params * 4
    print("\nNumber of trainable parameters: {}".format(num_params))
    print("Model is ~ {} bytes out of max 5000000 bytes\n".format(model_size))
    experiment.log_parameter("num_params", num_params)
    experiment.log_parameter("approx_model_size", model_size)

    fw.optimize(X_train, y_train, X_val, y_val)
Exemplo n.º 22
0
def main():
    args = parse_args()

    # Fix the random seeds.
    set_seeds(args.random_seed)

    # Default torch settings.
    torch.set_default_dtype(torch.float64)
    if torch.cuda.is_available():
        torch.set_default_tensor_type(torch.cuda.DoubleTensor)

    # load data
    gpdists, g = load_graph_pdists(args.input_graph,
                                   cache_dir='.cached_pdists')
    n_nodes = g.number_of_nodes()
    ds = GraphDataset(gpdists)
    fp = FastPrecision(g)

    # run hyp2
    emb = ManifoldEmbedding(n_nodes, [Lorentz(3)])
    path = os.path.join(args.save_dir, 'hyp2')
    train(ds, fp, emb, args.n_epochs, path)
    curvature_sq = 1 / emb.scales[0]

    # map it to SSPD
    sspd_emb = ManifoldEmbedding(n_nodes, [SPD(2)])
    sspd_emb.xs[0] = ManifoldParameter(h2_to_sspd2(emb.xs[0] /
                                                   curvature_sq.sqrt()),
                                       manifold=sspd_emb.manifolds[0])
    sspd_emb.scales[0] = torch.nn.Parameter(1 / curvature_sq / 2)
    assert torch.allclose(emb.compute_dists(None),
                          sspd_emb.compute_dists(None),
                          atol=1e-4)

    # run spd2
    path = os.path.join(args.save_dir, 'spd2')
    train(ds, fp, sspd_emb, args.n_epochs, path, args.n_epochs)
Exemplo n.º 23
0
import tensorflow as tf
import numpy as np
from utils import set_seeds, Data, create_normal_variable

set_seeds(43)


def model(X, weights, biases, dropout_prob):
    n_layers = len(weights)
    output = tf.add(tf.matmul(X, weights['input']), biases['input'])
    output = tf.nn.dropout(tf.nn.relu(output), dropout_prob)

    for i in xrange(2, n_layers):
        output = tf.add(tf.matmul(output, weights['h%i' % i]),
                        biases['b%i' % i])
        output = tf.nn.relu(output)

    output = tf.add(tf.matmul(output, weights['output']), biases['output'])
    return output, weights, biases


def inference(X, weights, biases):
    return model(X, weights, biases, 0.5)


def loss(y_hat, y):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(y_hat, y))


def train(loss):
    return tf.train.RMSPropOptimizer(learning_rate=0.0003,
Exemplo n.º 24
0
def run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps,
                    stochasticity, n_runs, n_episodes):
    # no baseline
    best_result = 0
    best_settings = dict()
    results_file = f'results/s{stochasticity}_no_baseline.csv'
    best_settings_file = f'results/s{stochasticity}_no_baseline_best_settings.pkl'

    with open(results_file, 'w') as f:
        f.write('discount_factor,learn_rate,hidden_dim,init_temp,result' +
                '\n')

    for discount_factor in discount_factors:
        for learn_rate in learn_rates:
            for hidden_dim in hidden_dims:
                for init_temp in init_temps:
                    print('#' * 30)
                    print('#' * 9 + ' NEW SEARCH ' + '#' * 9)
                    print('#' * 30)
                    print()

                    st = time()

                    # change this for learned baseline
                    print(
                        f'Search settings: baseline=run_episodes_no_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}'
                    )

                    # initialize the environment
                    env = gym.make('CartPole-v1')  # <---------- change this!

                    result = 0

                    for i in range(n_runs):
                        start_time = time()

                        policy_model = PolicyNetwork(
                            input_dim=4, hidden_dim=hidden_dim, output_dim=2
                        )  # change input_ and output_dim for gridworld env
                        seed = 40 + i
                        set_seeds(env, seed)

                        episode_durations, _ = run_episodes_no_baseline(
                            policy_model, env, n_episodes, discount_factor,
                            learn_rate, init_temp, stochasticity)
                        result += np.mean(episode_durations)

                        del policy_model

                        end_time = time()
                        h, m, s = get_running_time(end_time - start_time)

                        print(
                            f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
                        )

                    env.close()
                    result /= n_runs

                    with open(results_file, 'a') as f:
                        f.write(
                            f'{discount_factor},{learn_rate},{hidden_dim},{init_temp},{result}'
                            + '\n')

                    et = time()
                    h, m, s = get_running_time(et - st)

                    print(
                        f'Done with search in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
                    )
                    print(f'Average number of steps per episode: {result}')

                    if result > best_result:
                        best_result = result
                        best_settings['discount_factor'] = discount_factor
                        best_settings['learn_rate'] = learn_rate
                        best_settings['hidden_dim'] = hidden_dim
                        best_settings['init_temp'] = init_temp
                        best_settings['result'] = best_result

                        pkl.dump(best_settings, open(best_settings_file, 'wb'))

                        print(f'New best result!: {result}')
                        print(f'New best settings!: {best_settings}')
                    print()

    print()
    print()
    print(f'Best settings after completing grid search: {best_settings}')


# Choose what to run by uncommenting
#run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
#run_learned_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
#run_selfcritic_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
Exemplo n.º 25
0
def main(conf):
    # Prepare data
    train_dev = koco.load_dataset("korean-hate-speech", mode="train_dev")
    train, valid = train_dev["train"], train_dev["dev"]

    # Prepare tokenizer
    tokenizer = (
        get_tokenizer()
        if "kobert" in conf.pretrained_model
        else AutoTokenizer.from_pretrained(conf.pretrained_model)
    )
    if conf.tokenizer.register_names:
        names = pd.read_csv("entertainement_biographical_db.tsv", sep="\t")[
            "name_wo_parenthesis"
        ].tolist()
        tokenizer.add_tokens(names)

    # Mapping string y_label to integer label
    if conf.label.hate:
        train, label2idx = map_label2idx(train, "hate")
        valid, _ = map_label2idx(valid, "hate")
    elif conf.label.bias:
        train, label2idx = map_label2idx(train, "bias")
        valid, _ = map_label2idx(valid, "bias")

    # Use bias as an additional context for predicting hate
    if conf.label.hate and conf.label.bias:
        biases = ["gender", "others", "none"]
        tokenizer.add_tokens([f"<{label}>" for label in biases])

    # Prepare DataLoader
    train_dataset = KoreanHateSpeechDataset(train)
    valid_dataset = KoreanHateSpeechDataset(valid)
    collator = KoreanHateSpeechCollator(
        tokenizer, predict_hate_with_bias=(conf.label.hate and conf.label.bias)
    )
    train_loader = DataLoader(
        train_dataset,
        batch_size=conf.train_hparams.batch_size,
        shuffle=True,
        collate_fn=collator.collate,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=conf.train_hparams.batch_size,
        shuffle=False,
        collate_fn=collator.collate,
    )

    # Prepare model
    set_seeds(conf.train_hparams.seed)
    model = BertForSequenceClassification.from_pretrained(
        conf.pretrained_model, num_labels=len(label2idx)
    )
    if conf.tokenizer.register_names:
        model.resize_token_embeddings(len(tokenizer))
    elif conf.label.hate and conf.label.bias:
        model.resize_token_embeddings(len(tokenizer))
    model = model.to(device)

    # Prepare optimizer and scheduler
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.01,
        },
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
    optimizer = optim.AdamW(
        optimizer_grouped_parameters,
        lr=conf.train_hparams.lr,
        eps=conf.train_hparams.adam_epsilon,
    )

    n_total_iterations = len(train_loader) * conf.train_hparams.n_epochs
    n_warmup_steps = int(n_total_iterations * conf.train_hparams.warmup_ratio)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, n_warmup_steps, n_total_iterations
    )

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    # Train!
    trainer = BertTrainer(conf.train_hparams)
    model = trainer.train(
        model, criterion, optimizer, scheduler, train_loader, valid_loader
    )

    makedirs(conf.checkpoint_dir)
    makedirs(conf.log_dir)
    checkpoint_path = f"{conf.checkpoint_dir}/{conf.model_name}.pt"
    log_path = f"{conf.log_dir}/{conf.model_name}.log"
    torch.save({"model": model.state_dict()}, checkpoint_path)
    torch.save({"config": conf, "classes": label2idx, "tokenizer": tokenizer}, log_path)
    # optimize and save
    results = grid_optimization(nb_run, nb_processes, **kwargs)
    utils.save_json(os.path.join(log_path, 'results.json'), results)


if __name__ == '__main__':

    # parse arguments
    config = parse_args()

    # load json default arguments
    args = utils.load_json(config['config_file'])

    # seed for reproducibility
    utils.set_seeds(args['seed'])

    # logging
    model_name, logdir = args.pop('model_name'), args.pop('logdir')
    v = utils.get_version(name=model_name, logdir=logdir)
    log_path = os.path.join(logdir, f'{model_name}-v{v}')

    exp = config.pop('experiment')
    if exp == 'run':
        run(log_path, **args)

    elif exp == 'grid':
        nb_processes = config.pop('workers')
        nb_run = config.pop('average')
        grid(nb_run, nb_processes, log_path, **args)
Exemplo n.º 27
0
def run_selfcritic_baseline(stochasticity, n_runs, n_episodes):
    # self-critic baseline
    dir_path = os.path.dirname(os.path.realpath(__file__))
    best_settings_file = dir_path + f'/cart_pole_parameter_search/s{stochasticity}_SC_baseline_best_settings.pkl'
    eval_file = f'cart_evals/s{stochasticity}_SC_baseline.pkl'

    with open(best_settings_file, 'rb') as pickle_file:
        best_settings = pkl.load(pickle_file)
    discount_factor = best_settings['discount_factor']
    learn_rate = best_settings['learn_rate']
    hidden_dim = best_settings['hidden_dim']
    init_temp = best_settings['init_temp']

    st = time()

    # change this for learned baseline
    print(
        f'Run settings: baseline=run_episodes_with_SC_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}'
    )

    # initialize the environment
    env = gym.make('CartPole-v1')

    episode_durations_list = []
    reinforce_loss_list = []

    for i in range(n_runs):
        start_time = time()

        policy_model = PolicyNetwork(
            input_dim=4, hidden_dim=hidden_dim,
            output_dim=2)  # change input_ and output_dim for gridworld env
        seed = 40 + i
        set_seeds(env, seed)

        episode_durations, reinforce_loss = run_episodes_with_SC_baseline(
            policy_model, env, n_episodes, discount_factor, learn_rate,
            init_temp, stochasticity)

        episode_durations_list.append(episode_durations)
        reinforce_loss_list.append(reinforce_loss)

        del policy_model

        end_time = time()
        h, m, s = get_running_time(end_time - start_time)

        print(
            f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
        )

    env.close()

    et = time()
    h, m, s = get_running_time(et - st)

    evals = {}
    evals['episode_durations'] = episode_durations_list
    evals['reinforce_loss'] = reinforce_loss_list

    pkl.dump(evals, open(eval_file, 'wb'))

    print(
        f'Done with runs in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds'
    )
Exemplo n.º 28
0
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        opt.step()


def parse_args():
    # Adam: ~55% test accuracy, RMSprop: ~60% test accuracy with RnnModel1 (single layer)
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataroot', required=True)
    parser.add_argument('--batch-size', type=int, default=256)
    parser.add_argument('--iterations', type=int, default=int(1e4))
    parser.add_argument('--test-interval', type=int, default=100)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--clip', type=float, default=1)
    parser.add_argument('--seed', default=1, type=int)
    parser.add_argument('--device',
                        default='cuda' if torch.cuda.is_available() else 'cpu')
    parser.add_argument('--out', default='results')
    args = parser.parse_args()
    args.out = os.path.join(args.out, unique_string())
    return args


if __name__ == '__main__':
    args = parse_args()
    print(args)
    set_seeds(args.seed)
    main(args)
Exemplo n.º 29
0
def main(config='config/finetune/agnews/train.json'):

    cfg = Config(**json.load(open(config, "r")))

    cfg_data = data.Config(**json.load(open(cfg.cfg_data, "r")))
    cfg_model = models.Config(**json.load(open(cfg.cfg_model, "r")))
    cfg_optim = trainer.Config(**json.load(open(cfg.cfg_optim, "r")))

    set_seeds(cfg.seed)

    ### Prepare Dataset and Preprocessing ###

    TaskDataset = data.get_class(cfg_data.task) # task dataset class according to the task
    tokenizer = tokenization.FullTokenizer(vocab_file=cfg_data.vocab_file, do_lower_case=True)
    dataset = TaskDataset(cfg_data.data_file[cfg.mode], pipelines=[
        data.RemoveSymbols('\\'),
        data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
        data.AddSpecialTokensWithTruncation(cfg_data.max_len),
        data.TokenIndexing(tokenizer.convert_tokens_to_ids,
                           TaskDataset.labels,
                           cfg_data.max_len)
    ], n_data=None)
    tensors = TensorDataset(*dataset.get_tensors()) # To Tensors
    data_iter = DataLoader(tensors, batch_size=cfg_optim.batch_size, shuffle=False)

    ### Fetch Teacher's output and put it into the dataset ###

    def fetch_logits(model):
        def get_logits(model, batch):
            input_ids, segment_ids, input_mask, label_id = batch
            logits = model(input_ids, segment_ids, input_mask)
            return 0.0, logits

        train_loop = trainer.TrainLoop(cfg_optim, model, data_iter, None, None, get_device())
        results = torch.cat(train_loop.eval(get_logits, cfg.model_file))
        return results


    if cfg.mode == "train":
        print("Fetching teacher's output...")
        teacher = models.Classifier4Transformer(cfg_model, len(TaskDataset.labels))
        teacher.load_state_dict(torch.load(cfg.model_file)) # use trained model
        with torch.no_grad():
            teacher_logits = fetch_logits(teacher)

        tensors = TensorDataset(teacher_logits, *dataset.get_tensors()) # To Tensors
        data_iter = DataLoader(tensors, batch_size=cfg_optim.batch_size, shuffle=False)

    ### Models ###

    model = models.BlendCNN(cfg_model, len(TaskDataset.labels))
    checkpoint.load_embedding(model.embed, cfg.pretrain_file)

    optimizer = optim.optim4GPU(cfg_optim, model)

    train_loop = trainer.TrainLoop(
        cfg_optim, model, data_iter, optimizer, cfg.save_dir, get_device()
    )

    def get_loss(model, batch, global_step): # make sure loss is a scalar tensor
        teacher_logits, input_ids, segment_ids, input_mask, label_id = batch
        T = 1.0
        logits = model(input_ids, segment_ids, input_mask)
        loss = 0.1*nn.CrossEntropyLoss()(logits, label_id)
        loss += 0.9*nn.KLDivLoss()(
            F.log_softmax(logits/T, dim=1),
            F.softmax(teacher_logits/T, dim=1)
        )
        #loss = 0.9*nn.MSELoss()(logits, teacher_logits)
        return loss

    def evaluate(model, batch):
        input_ids, segment_ids, input_mask, label_id = batch
        logits = model(input_ids, segment_ids, input_mask)
        _, label_pred = logits.max(1)
        result = (label_pred == label_id).float() #.cpu().numpy()
        accuracy = result.mean()
        return accuracy, result

    if cfg.mode == "train":
        train_loop.train(get_loss, None, None) # not use pretrain file
        print("Training has been done properly.")

    elif cfg.mode == "eval":
        results = train_loop.eval(evaluate, cfg.model_file)
        total_accuracy = torch.cat(results).mean().item()
        print(f"Accuracy: {total_accuracy}")
Exemplo n.º 30
0
sort_dict(
    experiment,
    ['name', 'tags', 'epoch', 'samples', 'model', 'optimizer', 'sessions'])
sort_dict(session, [
    'epochs', 'batch_size', 'losses', 'seed', 'cpus', 'device', 'samples',
    'status', 'datetime_started', 'datetime_completed', 'data', 'log',
    'checkpoint', 'git', 'gpus'
])
experiment.sessions.append(session)
pyaml.pprint(experiment, sort_dicts=False, width=200)
del session
# endregion

# region Building phase
# Seeds (set them after the random run id is generated)
set_seeds(experiment.session.seed)

# Model
model: torch.nn.Module = import_(experiment.model.fn)(
    *experiment.model.args, **experiment.model.kwargs)
if 'state_dict' in experiment.model:
    model.load_state_dict(torch.load(experiment.model.state_dict))
model.to(experiment.session.device)

# Optimizer
optimizer: torch.optim.Optimizer = import_(
    experiment.optimizer.fn)(model.parameters(), *experiment.optimizer.args,
                             **experiment.optimizer.kwargs)
if 'state_dict' in experiment.optimizer:
    optimizer.load_state_dict(torch.load(experiment.optimizer.state_dict))