示例#1
0
def main(lr, num_epochs, datadir, batch_size, nworkers, outdir):
    # code for GPU support : Start
    cudnn.benchmark = True
    network = resnet18().cuda()
    # code for GPU support : End
    # path = "changed_parameters/"
    # epochList = ["epoch_15.pth","epoch_20.pth","epoch_30.pth","epoch_90.pth","epoch_99.pth"]
    # snapshotLoad = torch.load("changed_parameters/epoch_99.pth")
    # network.load_state_dict(snapshotLoad.get("model_state"))
    train_iter = make_image_loader(pt.join(datadir, 'train.msgpack'),
                                   batch_size, nworkers, *ARGS_VAL)

    val_iter = make_image_loader(pt.join(datadir, 'val.msgpack'), batch_size,
                                 nworkers, *ARGS_VAL)

    # code without GPU support
    # net = resnet18()

    loss = CrossEntropyLoss(output_key="net_out").cuda()
    val_loss = CrossEntropyLoss(output_key="net_out").cuda()
    optimizer = optim.SGD(network.parameters(),
                          lr=lr,
                          weight_decay=0.0004,
                          momentum=0.9)
    policy = PolyPolicy(optimizer, num_epochs, power=1)

    # trainer.logger.info(run_id=_run._id)
    # # trainer.set_hook('train_begin', set_eval)
    # with train_iter, val_iter:
    #     trainer.train(num_epochs, start_epoch=start_epoch)
    trainer = Trainer(network, optimizer, loss, AccuracyMetric(), None, policy,
                      train_iter, val_iter, outdir, val_loss)
    with train_iter, val_iter:
        trainer.train(num_epochs)
    def batch_start(self, t: Trainer):
        # skip
        if t.step % self.interval != 0:
            return

        with torch.no_grad():
            bleu = self._score(t)
            print(f"BLEU:{bleu}\n")

            if self.best is None or bleu > self.best:
                self.best = bleu
                self.patience = self.early_stop
                if self.keep_best:
                    t.checkpoint(name=t.config["name"], tags=["best"])

                # save the best perplexity and bleu score
                val_loss = t.eval_epoch(only_eval=True)
                ce_loss = pandas.DataFrame(val_loss)["mt"].mean()
                text = f"BLEU:{bleu}" \
                       f"\nCross-Entropy:{ce_loss:.2f}" \
                       f"\nPerplexity:{math.exp(ce_loss):.2f}"
                t.exp.text("best_scores", text, "Best scores")

            else:
                self.patience -= 1

                if self.patience < 0:
                    t.early_stop = True

            t.exp.line("bleu", None, "BLEU", bleu)
示例#3
0
def main():
    args = parse_args()

    if args.vk_group_name is not None:
        from modules import VKParser
        vk_parser = VKParser(group_name=args.vk_group_name,
                             app_id=args.vk_appid,
                             login=args.vk_login,
                             password=args.vk_pass)
        if args.vk_file is not None:
            vk_parser.dump_posts(args.vk_file)
            return

    trainer = Trainer(n=args.model_n, lc=args.lc)

    if args.vk_group_name is not None:
        trainer.train(vk_parser.post_iter(args.vk_offset, args.vk_count),
                      re=re_ru_words_punc,
                      output_newlines=True)
    elif args.input_dir is not None:
        for entry in scandir(args.input_dir):
            if entry.name.endswith('.txt') and entry.is_file():
                with open(entry.path, 'r', encoding='utf8') as file:
                    trainer.train(file,
                                  re=re_ru_words_punc,
                                  output_newlines=True)
    else:
        trainer.train(stdin, re=re_ru_words_punc, output_newlines=True)

    with open(args.model, 'wb') as file:
        trainer.get_model().dump(file)
示例#4
0
def main():
    dataset = Dataset()
    fcn_model = loader.get_fcn_model_module().FCNModel()
    trainer = Trainer()
    trainer.train(fcn_model, dataset)

    if not config.one_batch_overfit:
        detector = FCNDetector(fcn_model.model)
        detector.weights_path = osp.join(fcn_model.weights_dir, 'best_weights.hdf5')
        estimate_quality(detector, dataset)
示例#5
0
def main(config):
    loaders = DataLoader(train_fn=config.train_fn,
                         batch_size=config.batch_size,
                         min_freq=config.min_vocab_freq,
                         max_vocab=config.max_vocab_size,
                         device=config.gpu_id)

    print(
        '|train| =',
        len(loaders.train_loader.dataset),
        '|valid| =',
        len(loaders.valid_loader.dataset),
    )

    vocab_size = len(loaders.text.vocab)
    n_classes = len(loaders.label.vocab)
    print('|vocab| =', vocab_size, ' |classes| =', n_classes)

    if config.rnn is False and config.cnn is False:
        raise Exception(
            'You need to specify an archtiecture to train. (--rnn or --cnn)')

    if config.rnn:
        #Declare model and loss.
        model = RNNClassifier(
            input_size=vocab_size,
            word_vec_size=config.word_vec_size,
            hidden_size=config.hidden_size,
            n_classes=n_classes,
            n_layers=config.n_layers,
            dropout_p=config.dropout,
        )
        optimizer = optim.Adam(model.parameters())
        crit = nn.NLLLoss()
        print(model)

        if config.gpu_id >= 0:
            model.cuda(config.gpu_id)
            crit.cuda(config.gpu_id)

        rnn_trainer = Trainer(config)
        rnn_model = rnn_trainer.train(model, crit, optimizer,
                                      loaders.train_loader,
                                      loaders.valid_loader)

    torch.save(
        {
            'rnn': rnn_model.state_dict() if config.rnn else None,
            'cnn': cnn_model.state_dict() if config.cnn else None,
            'config': config,
            'vocab': loaders.text.vocab,
            'classes': loaders.label.vocab,
        }, config.model_fn)
示例#6
0
def test_main():
    config = json.load(open('config.json', 'r'))
    config["output_path"] += "{:%Y-%m-%d_%H:%M}/".format(
        datetime.datetime.now())
    config['is_file_saved'] = False
    config['portrait_dir'] = "./data/person_image_dataset/96x64_one/"
    config['batch_size'] = 1
    config['train_data_num'] = 1
    config['test_data_num'] = 1

    t = Trainer(**config)
    t.optimize()
    assert 1 == 1
示例#7
0
def main(config, model_weight=None, opt_weight=None):
    def print_config(config):
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(vars(config))
    print_config(config)

    loader = DataLoader(
        config.train,
        config.valid,
        (config.lang[:2], config.lang[-2:]),
        batch_size=config.batch_size,
        device=-1,
        max_length=config.max_length
    )

    input_size, output_size = len(loader.src.vocab), len(loader.tgt.vocab)
    model = get_model(input_size, output_size, config)
    crit = get_crit(output_size, data_loader.PAD)

    if model_weight:
        model.load_state_dict(model_weight)

    if config.gpu_id >= 0:
        model.cuda(config.gpu_id)
        crit.cuda(config.gpu_id)

    optimizer = get_optimizer(model, config)

    if opt_weight:
        optimizer.load_state_dict(opt_weight)

    lr_scheduler = None

    if config.verbose >= 2:
        print(model)
        print(crit)
        print(optimizer)

    trainer = Trainer(IgniteEngine, config)
    trainer.train(
        model,
        crit,
        optimizer,
        train_loader=loader.train_iter,
        valid_loader=loader.valid_iter,
        src_vocab=loader.src.vocab,
        tgt_vocab=loader.tgt.vocab,
        n_epochs=config.n_epochs,
        lr_scheduler=lr_scheduler
    )
示例#8
0
def main(config):
    if config.gpu_id < 0:
        print("Device: CPU")
    else:
        print("Device:", torch.cuda.get_device_name(config.gpu_id))

    print("Building Vocab...")
    data_handler = CbowDataHandler(
        file_name=config.train_fn,
        window_size=config.window_size,
        train_ratio=config.train_ratio,
        batch_size=config.batch_size,
    )
    print('|train| =', len(data_handler.train_loader.dataset), '|valid| =',
          len(data_handler.valid_loader.dataset))
    print('|vocab_size| =', data_handler.vocab_size)

    model = CBOW(
        vocab_size=data_handler.vocab_size,
        embd_size=config.embd_size,
        window_size=config.window_size,
        hidden_size=config.hidden_size,
    )
    # crit = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    crit = nn.NLLLoss()
    print(model)

    if config.gpu_id >= 0:
        model.cuda(config.gpu_id)
        crit.cuda(config.gpu_id)

    trainer = Trainer(config)
    trainer.train(model, crit, optimizer, data_handler.train_loader,
                  data_handler.valid_loader)

    # Test
    test_data = ['맞교환', '백색', '합판', '이메일']
    ctx_idxs = [data_handler.w2i[w] for w in test_data]
    ctx_var = Variable(torch.LongTensor([ctx_idxs])).to(config.gpu_id)

    model.zero_grad()
    y = model(ctx_var)
    _, predicted = torch.max(y.data, 1)
    predicted_word = data_handler.i2w[int(predicted[0])]

    print('input:', test_data)
    print('predicted:', predicted_word)
示例#9
0
def main():
    # parse arguments
    args = parse_agrs()

    # fix random seeds
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(args.seed)

    # create tokenizer
    tokenizer = Tokenizer(args)

    # create data loader
    train_dataloader = R2DataLoader(args,
                                    tokenizer,
                                    split='train',
                                    shuffle=True)
    val_dataloader = R2DataLoader(args, tokenizer, split='val', shuffle=False)
    test_dataloader = R2DataLoader(args,
                                   tokenizer,
                                   split='test',
                                   shuffle=False)

    # build model architecture
    model = R2GenModel(args, tokenizer)

    # get function handles of loss and metrics
    criterion = compute_loss
    metrics = compute_scores

    # build optimizer, learning rate scheduler
    optimizer = build_optimizer(args, model)
    lr_scheduler = build_lr_scheduler(args, optimizer)

    # build trainer and start to train
    trainer = Trainer(model, criterion, metrics, optimizer, args, lr_scheduler,
                      train_dataloader, val_dataloader, test_dataloader)
    trainer.train()
示例#10
0
import datetime
import json
import os

from modules.trainer import Trainer
from utils.argument_handler import argment_handler

if __name__ == "__main__":
    args = argment_handler()

    config = json.load(open(args.config_file, 'r'))
    config["output_path"] += "{:%Y-%m-%d_%H:%M}/".format(
        datetime.datetime.now())
    config['is_file_saved'] = not args.no_write

    if config['is_file_saved']:
        os.mkdir(config["output_path"])
        json.dump(config,
                  open(config["output_path"] + 'config.json', 'w'),
                  indent=4)

    t = Trainer(**config)
    t.optimize()
示例#11
0
def main(in_dataset_folder, in_noisy_dataset_folder, in_custom_vocab_file,
         in_model_folder, in_config):
    with open(in_config, encoding='utf-8') as config_in:
        config = json.load(config_in)
    train_json = load_hcn_json(os.path.join(in_dataset_folder, 'train.json'))
    dev_json = load_hcn_json(os.path.join(in_dataset_folder, 'dev.json'))
    # test_json = load_hcn_json(os.path.join(in_dataset_folder, 'test.json'))
    test_ood_json = load_hcn_json(
        os.path.join(in_noisy_dataset_folder, 'test_ood.json'))

    kb = make_augmented_knowledge_base(
        os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-kb.txt'),
        os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-candidates.txt'))
    action_templates = train_json['actions']
    max_noisy_dialog_length = max(
        [len(dialog['turns']) for dialog in test_ood_json['dialogs']])
    config['max_input_length'] = max_noisy_dialog_length

    et = EntityTracker(kb)

    post_ood_turns_clean, post_ood_turns_noisy = mark_post_ood_turns(
        test_ood_json)

    if in_custom_vocab_file is not None:
        with open(in_custom_vocab_file) as vocab_in:
            rev_vocab = [line.rstrip() for line in vocab_in]
            vocab = {word: idx for idx, word in enumerate(rev_vocab)}
    else:
        utterances_tokenized = []
        for dialog in train_json['dialogs']:
            for utterance in dialog['turns']:
                utterances_tokenized.append(utterance['input'].split())

        vocab, rev_vocab = make_vocabulary(
            utterances_tokenized,
            config['max_vocabulary_size'],
            special_tokens=[PAD, START, UNK, EOS] + list(kb.keys()))
    ctx_features = []
    for dialog in train_json['dialogs']:
        for utterance in dialog['turns']:
            if 'context_features' in utterance:
                ctx_features.append(utterance['context_features'])
    ctx_features_vocab, ctx_features_rev_vocab = make_vocabulary(
        ctx_features, config['max_vocabulary_size'], special_tokens=[])
    config['vocabulary_size'] = len(vocab)

    print('Training with config: {}'.format(json.dumps(config)))
    data_preparation_function = getattr(utils.preprocessing,
                                        config['data_preparation_function'])

    data_train = data_preparation_function(train_json, vocab,
                                           ctx_features_vocab, et, **config)
    data_dev = data_preparation_function(dev_json, vocab, ctx_features_vocab,
                                         et, **config)
    # data_test = data_preparation_function(test_json, vocab, ctx_features_vocab, et, **config)
    data_test_ood = data_preparation_function(test_ood_json, vocab,
                                              ctx_features_vocab, et, **config)

    dropout_turn_generation_function = getattr(
        utils.preprocessing, config['dropout_turn_generation_function'])
    random_input = dropout_turn_generation_function(
        10000, 3, config['max_sequence_length'], train_json, vocab,
        config['turn_word_dropout_prob'])

    save_model(rev_vocab, config, kb, action_templates, in_model_folder)
    net = getattr(modules, config['model_name'])(vocab, config,
                                                 len(ctx_features_vocab),
                                                 len(action_templates))
    trainer = Trainer(data_train, data_dev, data_test_ood, action_templates,
                      random_input, post_ood_turns_noisy, config, net,
                      in_model_folder)
    trainer.train()
示例#12
0
def main(env, visualise, folder_name, **kwargs):
    shutil.copyfile(os.path.abspath(__file__), folder_name + 'main.py')
    obs_dim = tuple(env.observation_space.sample().shape)
    assert len(obs_dim) == 1 or len(
        obs_dim) == 3, f'States should be 1D or 3D vector. Received: {obs_dim}'
    a_dim = tuple(env.action_space.sample().shape)
    print('Observation space:', obs_dim)
    print('Action space:', a_dim)
    device = 'cpu'  #'cuda' if torch.cuda.is_available() else 'cpu'
    policy = Policy(obs_dim,
                    a_dim,
                    sigma=kwargs['exploration_noise'],
                    device=device,
                    **kwargs)
    policy.save(folder_name)
    if kwargs['encoder_type'] == 'none':
        encoder = None
    elif kwargs['encoder_type'] == 'random':
        pass
    elif kwargs['encoder_type'] == 'vae':
        pass
    elif kwargs['encoder_type'] == 'idf':
        pass
    elif kwargs['encoder_type'] == 'cont':
        pass
    if encoder is None:
        wm = WorldModel(obs_dim, a_dim, **kwargs)
    else:
        wm = EncodedWorldModel(obs_dim,
                               a_dim,
                               kwargs['z_dim'],
                               encoder,
                               device=device,
                               **kwargs)

    trainer = Trainer(x_dim=obs_dim,
                      a_dim=a_dim,
                      policy=policy,
                      wm=wm,
                      encoder=encoder,
                      **kwargs)

    scores = {'train': [], 'eval': []}
    start_time = datetime.now()
    buffer = DynamicsReplayBuffer(kwargs['buffer_size'], device)
    while trainer.train_steps < kwargs['train_steps']:
        done = False
        s_t = env.reset()
        env.render()
        score = 0
        while not done:
            a_t = policy.act(
                torch.from_numpy(s_t).to(dtype=torch.float32,
                                         device=device)).numpy()
            s_tp1, r_t, done, info = env.step(a_t)
            env.render()
            score += r_t
            buffer.add(s_t, a_t, s_tp1, done)
            if trainer.train_steps < kwargs['train_steps']:
                xs_t, as_t, xs_tp1, dones = buffer.sample(kwargs['batch_size'])
                trainer.train_step(xs_t, as_t, xs_tp1)
            if trainer.train_steps % kwargs['export_interval'] == 0:
                visualise.train_iteration_update(
                    **{
                        k + '_loss': np.mean(i[-kwargs['export_interval']:])
                        for k, i in trainer.losses.items() if i != []
                    },
                    ext=np.mean(scores['train'][-kwargs['eval_interval']:]))
                trainer.save_models(folder_name + 'saved_objects/')
            if trainer.train_steps % kwargs['eval_interval'] == 0:
                print(trainer.train_steps)
            s_t = s_tp1
        scores['train'].append(score)
        print(score)
def main():
    # Argument passing/parsing
    args, model_args = config_utils.initialize_argparser(
        MODELS, _command_args, custom_argparsers.DialogArgumentParser)
    hparams, hparams_dict = config_utils.create_or_load_hparams(
        args, model_args, args.cfg)
    pprint(hparams_dict)

    if hparams.test_mode == 'wow':
        os.makedirs('./tmp', exist_ok=True)
        if not os.path.exists('tmp/wow_pretrained'):
            fname = 'wow_pretrained.zip'
            gd_id = '1lkF1QENr45j0vl-Oja3wEiqkxoNTxkXT'
            colorlog.info(f"Download pretrained checkpoint {fname}")
            download_from_google_drive(gd_id, os.path.join('tmp', fname))
            unzip('tmp', fname)
        ckpt_fname = os.path.join('tmp/wow_pretrained', 'ckpt-46070')
    else:
        raise ValueError("Only 'wow' is currently supported")

    # Set environment variables & gpus
    set_logger()
    set_gpus(hparams.gpus)
    set_tcmalloc()
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus, 'GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # Set random seed
    #tf.random.set_seed(hparams.random_seed)
    #np.random.seed(hparams.random_seed)
    #random.seed(hparams.random_seed)

    # Set gpu
    assert hparams.num_gpus == 1
    mirrored_strategy = None

    # Make dataset reader
    os.makedirs(hparams.cache_dir, exist_ok=True)
    reader = WowDatasetReader(
        hparams.batch_size,
        hparams.num_epochs,
        buffer_size=hparams.buffer_size,
        bucket_width=hparams.bucket_width,
        max_length=hparams.max_length,
        max_episode_length=hparams.max_episode_length,
        max_knowledge=hparams.max_knowledge,
        knowledge_truncate=hparams.knowledge_truncate,
        cache_dir=hparams.cache_dir,
        bert_dir=hparams.bert_dir,
    )
    train_dataset, iters_in_train = reader.read('train', mirrored_strategy)
    test_dataset, iters_in_test = reader.read('test', mirrored_strategy)
    vocabulary = reader.vocabulary

    # Build model & optimizer & trainer
    model = MODELS[hparams.model](hparams, vocabulary)
    optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr,
                                         clipnorm=hparams.clipnorm)
    trainer = Trainer(model, optimizer, mirrored_strategy,
                      hparams.enable_function, WowDatasetReader.remove_pad)

    # Setup checkpoint
    global_step = tf.compat.v1.train.get_or_create_global_step()
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     optimizer_step=global_step)
    train_example = next(iter(train_dataset))
    _ = trainer.train_step(train_example)
    checkpoint.restore(ckpt_fname)

    # Load retriever and input processor
    dictionary = reader._dictionary
    tokenize_fn = lambda x: [data_vocab.BERT_CLS_ID] \
        + dictionary.convert_tokens_to_ids(dictionary.tokenize(x)) \
        + [data_vocab.BERT_SEP_ID]
    input_processor = InteractiveInputProcessor(tokenize_fn, 5)

    # Compile graph
    colorlog.info("Compile model")
    dummy_input = input_processor.get_dummy_input()
    for _ in trange(5, ncols=70):
        trainer.test_step(dummy_input)

    # Module for interactive mode
    wiki_tfidf_retriever = WikiTfidfRetriever(hparams.cache_dir)
    topics_generator = TopicsGenerator(hparams.cache_dir)
    interactive_world = InteractiveWorld(responder=trainer,
                                         input_processor=input_processor,
                                         wiki_retriever=wiki_tfidf_retriever,
                                         topics_generator=topics_generator)

    # Loop!
    while True:
        interactive_world.run()
        interactive_world.reset()
示例#14
0
    """ load model """
    model = prepare_model(args)
    model.cuda()

    """ define loss """
    criterion = nn.CrossEntropyLoss()

    """ setup metrics """
    metric = MeanIOUScore(9)

    """ setup optimizer """
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    """ setup tensorboard """
    writer = SummaryWriter(os.path.join(args.save_dir, "train_info"))

    """ setup trainer """
    trainer = Trainer(
        model,
        optimizer,
        criterion,
        args.accumulate_gradient,
        train_loader,
        val_loader,
        writer,
        metric,
        args.save_dir,
    )

    trainer.fit(args.epochs)
示例#15
0
    def evaluate_mdrnn(test_loader, multi_dimensional_rnn, device,
                       vocab_list: list, blank_symbol: str, horizontal_reduction_factor: int,
                       image_input_is_unsigned_int: bool, input_is_list: bool,
                       language_model_parameters: LanguageModelParameters,
                       save_score_table_file_path: str, epoch_number: int, epoch_statistics: EpochStatistics):

        correct = 0
        total = 0

        output_strings = list([])
        reference_labels_strings = list([])

        for data in test_loader:
            inputs, labels = data

            if Utils.use_cuda():
                labels = labels.to(device)

                if input_is_list:
                    inputs = Utils.move_tensor_list_to_device(inputs, device)
                else:
                    inputs = inputs.to(device)

            # If the image input comes in the form of unsigned ints, they need to
            # be converted to floats (after moving to GPU, i.e. directly on GPU
            # which is faster)
            if image_input_is_unsigned_int:
                Trainer.check_inputs_is_right_type(inputs, input_is_list)
                inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(inputs)

            # https://github.com/pytorch/pytorch/issues/235
            # Running the evaluation without computing gradients is the recommended way
            # since this saves time, and more importantly, memory
            with torch.no_grad():

                # outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
                max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(inputs)
                outputs = multi_dimensional_rnn(inputs, max_input_width)

                probabilities_sum_to_one_dimension = 2
                # Outputs is the output of the linear layer which is the input to warp_ctc
                # But to get probabilities for the decoder, the softmax function needs to
                # be applied to the outputs
                probabilities = torch.nn.functional. \
                    softmax(outputs, probabilities_sum_to_one_dimension)

                # No longer necessary with fixed word separator specification in decoder
                # and normal language model
                # probabilities = Evaluator.append_preceding_word_separator_to_probabilities(
                #    probabilities, vocab_list, Evaluator.WORD_SEPARATOR_SYMBOL)

                print(">>> evaluate_mdrnn  - outputs.size: " + str(outputs.size()))
                print(">>> evaluate_mdrnn  - probabilities.size: " + str(probabilities.size()))

                # beam_size = 20   # This is the problem perhaps...
                # beam_size = 100  # The normal default is 100
                beam_size = Evaluator.BEAM_SIZE  # Larger value to see if it further improves results
                # This value specifies the number of (character) probabilities kept in the
                # decoder. If it is set equal or larger to the number of characters in the
                # vocabulary, no pruning is done for it
                cutoff_top_n = len(vocab_list)  # No pruning for this parameter
                print(">>> evaluate_mdrnn  - len(vocab_list): " + str(len(vocab_list)))
                decoder = Evaluator.create_decoder(vocab_list,  cutoff_top_n, beam_size,
                                                   blank_symbol,
                                                   language_model_parameters)
                label_sizes = WarpCTCLossInterface. \
                    create_sequence_lengths_specification_tensor_different_lengths(labels)

                sequence_lengths = WarpCTCLossInterface.\
                    create_probabilities_lengths_specification_tensor_different_lengths(
                        labels, horizontal_reduction_factor, probabilities)
                sequence_lengths = Evaluator.increase_sequence_lengths_by_one(sequence_lengths)
                # print(">>> evaluate_mdrnn  -  sequence lengths: " + str(sequence_lengths))
                # print("probabilities.data.size(): " + str(probabilities.data.size()))
                beam_results, beam_scores, timesteps, out_seq_len = \
                    decoder.decode(probabilities.data, sequence_lengths)

                # print(">>> evaluate_mdrnn  - beam_results: " + str(beam_results))

                total += labels.size(0)

                for example_index in range(0, beam_results.size(0)):
                    beam_results_sequence = beam_results[example_index][0]
                    # print("beam_results_sequence: \"" + str(beam_results_sequence) + "\"")
                    use_language_model_in_decoder = language_model_parameters is not None
                    output_string = Evaluator.convert_to_string(
                        beam_results_sequence, vocab_list, out_seq_len[example_index][0],
                        use_language_model_in_decoder)
                    example_labels_with_padding = labels[example_index]
                    # Extract the real example labels, removing the padding labels
                    reference_labels = example_labels_with_padding[0:label_sizes[example_index]]

                    # print(">>> evaluate_mdrnn  - reference_labels: " + str(reference_labels))
                    reference_labels_string = Evaluator.convert_labels_tensor_to_string(
                        reference_labels, vocab_list, blank_symbol)

                    if reference_labels_string == output_string:
                        # print("Yaaaaah, got one correct!!!")
                        correct += 1
                        correct_string = "correct"
                    else:
                        correct_string = "wrong"

                    print(">>> evaluate_mdrnn  - output: \"" + output_string + "\" " +
                          "\nreference: \"" + reference_labels_string + "\" --- "
                          + correct_string)

                    output_strings.append(output_string)
                    reference_labels_strings.append(reference_labels_string)

            # correct += (predicted == labels).sum()

        cer_including_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, True)

        cer_excluding_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, False)

        wer = evaluation_metrics.word_error_rate. \
            compute_word_error_rate_for_list_of_output_reference_pairs(
                output_strings, reference_labels_strings)

        total_examples = len(test_loader.dataset)
        validation_stats = ValidationStats(total_examples, correct, cer_excluding_word_separators, wer)
        # https://stackoverflow.com/questions/3395138/using-multiple-arguments-for-string-formatting-in-python-e-g-s-s
        print("Accuracy of the network on the {} test inputs: {:.2f} % accuracy".format(
            total_examples, validation_stats.get_accuracy()))

        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "including word separators: {:.3f}  CER".format(
                total_examples, cer_including_word_separators))
        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "excluding word separators: {:.3f}  CER".format(
                total_examples, cer_excluding_word_separators))
        print("Word Error Rate (WER)[%] of the network on the {} test inputs: {:.3f}  WER".format(
            total_examples, wer))

        if save_score_table_file_path is not None:
            score_file_existed = os.path.exists(save_score_table_file_path)
            # Opens the file in append-mode, create if it doesn't exists
            with open(save_score_table_file_path, "a") as scores_table_file:
                if not score_file_existed:
                    scores_table_file.write(Evaluator.score_table_header(total_examples, epoch_statistics))
                scores_table_file.write(Evaluator.score_table_line(epoch_number, correct,
                                                                   validation_stats.get_accuracy(),
                                                                   cer_including_word_separators,
                                                                   cer_excluding_word_separators,
                                                                   wer,
                                                                   epoch_statistics) + "\n")

        return validation_stats
示例#16
0
    joint_num = 24
    model = get_pose_net(RESNET_TYPE, OUTPUT_SHAPE, True, joint_num).to(device)
    model = nn.DataParallel(model)

    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     250,
                                                     eta_min=1e-3)
    metric_fn = get_metric_fn

    trainer = Trainer(model,
                      device,
                      metric_fn,
                      optimizer,
                      scheduler,
                      logger=system_logger)
    early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE,
                                     verbose=True,
                                     logger=system_logger)

    key_column_value_list = [
        TRAIN_SERIAL, TRAIN_TIMESTAMP, MODEL, OPTIMIZER, LOSS_FN, METRIC_FN,
        EARLY_STOPPING_PATIENCE, BATCH_SIZE, EPOCHS, LEARNING_RATE,
        WEIGHT_DECAY, RANDOM_SEED
    ]

    performance_recorder = PerformanceRecorder(
        column_name_list=PERFORMANCE_RECORD_COLUMN_NAME_LIST,
        record_dir=PERFORMANCE_RECORD_DIR,
示例#17
0
def main():
    parser = get_train_parser()
    args = parser.parse_args()

    if torch.cuda.is_available():
        torch.cuda.set_device(0)
    torch.manual_seed(args.seed)

    task = TextPlanningTask.setup_task(args)
    task.load_dataset(args.valid_set)

    model = task.build_model(args).cuda()
    criterion = task.build_criterion(args).cuda()

    print('| model {}, criterion {}'.format(args.model_name,
                                            criterion.__class__.__name__))
    print('| num. model params: {} (num. trained: {})'.format(
        sum(p.numel() for p in model.parameters()),
        sum(p.numel() for p in model.parameters() if p.requires_grad),
    ))

    if args.tensorboard_logdir:
        tensorboard_logdir = args.tensorboard_logdir

        if tensorboard_logdir[-1] == '/':
            tensorboard_logdir = tensorboard_logdir[:-1]

        args.tensorboard_logdir = f"{tensorboard_logdir}_{time.strftime('%Y%m%d_%H%M%S')}"
        print('Tensorboard path {}'.format(args.tensorboard_logdir))

    args.ckpt_dir = f'../checkpoints/planning/{args.domain}/{args.exp_name}/'

    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    optimizer = FairseqAdam(args, params)

    lr_scheduler = InverseSquareRootSchedule(args, optimizer)
    lr_scheduler.step_update(0)

    # Build trainer
    trainer = Trainer(args, task, model, criterion, optimizer, lr_scheduler)

    print('| max tokens per GPU = {} and max samples per GPU = {}'.format(
        args.max_tokens,
        args.max_samples,
    ))

    # Load the latest checkpoint if one is available and restore the
    # corresponding train iterator
    extra_state, epoch_itr = checkpoint_utils.load_checkpoint(args, trainer)

    # Train until the learning rate gets too small
    max_epoch = args.max_epoch
    valid_losses = [None]

    while epoch_itr.epoch < max_epoch:
        # train for one epoch
        train_epoch(args, trainer, epoch_itr)

        if epoch_itr.epoch % args.validate_interval == 0:
            valid_losses = validate(args, trainer, task, epoch_itr)

        # only use first validation loss to update the learning rate
        trainer.lr_step(epoch_itr.epoch, valid_losses[0])

        # save checkpoint
        if epoch_itr.epoch % args.save_interval == 0:
            checkpoint_utils.save_checkpoint(args, trainer, epoch_itr,
                                             valid_losses[0])
示例#18
0
    set_random_seed(_env['seed'])

    project_name = _root.split("/")[-1]
    run_name = (f"{_model['name']}_{_model['size']}-"
                f"lr_{_training['lr']}-bsz_{_training['batch_size']}-"
                f"seed_{_env['seed']}")
    now = datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss')

    tokenizer = get_tokenizer(_model['name'], _model['size'])

    train_dataset = CustomDataset(_root, 'train', tokenizer, _training["max_len"])
    dev_dataset = CustomDataset(_root, 'dev', tokenizer, _training["max_len"])

    Model = get_model_class(_model['name'])
    Opt = get_optim_class(_model['opt'])
    Loss_fn = get_loss_fn_class(_model['loss'])
    model = Model(n_outputs=train_dataset.n_outputs, size=_model['size'],
                  pretrained_model_path=str2bool(_model['pretrained_model_path']))

    metric_dic = {
        "acc": Accuracy(),
        "precision": Precision()
    }
    callbacks = [
        ModelCheckpoint(f"{_save_model_root}/{run_name}.pth", monitor='dev_loss', mode="min")
    ]

    trainer = Trainer(model=model, loss_fn_class=Loss_fn, optimizer_class=Opt, metrics=metric_dic)
    trainer.fit(train_dataset, dev_dataset, lr=_training['lr'], epochs=_training['epochs'],
                batch_size=_training['batch_size'], callbacks=callbacks)
示例#19
0
def main(argv):
    trainer = Trainer()
    trainer.run()
示例#20
0
lr = 0.001
momentum = 0.9
batch_size = 5
start_epoch = 1
end_epoch = 1
data_root = ''

# Preprocessing
transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
datasets = dset.ImageFolder('../images/', transform=transforms)
train_loader = torch.utils.data.DataLoader(datasets,
                                           batch_size=batch_size,
                                           shuffle=True)

# Model Setting
model = models.vgg19(pretrained=True)
model.fc = nn.Linear(1000, num_classes)
if args.use_cuda:
    model = model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
trainer = Trainer(optimizer, criterion, model, 10, train_loader, args.use_cuda)
trained_model = trainer.run()

torch.save(trained_model.state_dict(), '../weights/vgg_weight.pth')
示例#21
0
def main():
    # Argument passing/parsing
    args, model_args = config_utils.initialize_argparser(
        MODELS, _command_args, custom_argparsers.DialogArgumentParser)
    hparams, hparams_dict = config_utils.create_or_load_hparams(
        args, model_args, args.cfg)
    pprint(hparams_dict)

    # Set environment variables & gpus
    set_logger()
    set_gpus(hparams.gpus)
    set_tcmalloc()
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus, 'GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # Set random seed
    tf.random.set_seed(hparams.random_seed)
    np.random.seed(hparams.random_seed)
    random.seed(hparams.random_seed)

    # For multi-gpu
    if hparams.num_gpus > 1:
        mirrored_strategy = tf.distribute.MirroredStrategy()  # NCCL will be used as default
    else:
        mirrored_strategy = None

    # Download BERT pretrained model
    if not os.path.exists(hparams.bert_dir):
        os.makedirs(hparams.bert_dir)
        fname = 'uncased_L-12_H-768_A-12.zip'
        gd_id = '17rfV9CleFBwwfS7m5Yd72vvxdPLWBHl6'
        download_from_google_drive(gd_id, os.path.join(hparams.bert_dir, fname))
        unzip(hparams.bert_dir, fname)

    # Make dataset reader
    os.makedirs(hparams.cache_dir, exist_ok=True)
    if hparams.data_name == "wizard_of_wikipedia":
        reader_cls = WowDatasetReader
    elif hparams.data_name == "holle":
        reader_cls = HolleDatasetReader
    else:
        raise ValueError("data_name must be one of 'wizard_of_wikipedia' and 'holle'")
    reader = reader_cls(
        hparams.batch_size, hparams.num_epochs,
        buffer_size=hparams.buffer_size,
        bucket_width=hparams.bucket_width,
        max_length=hparams.max_length,
        max_episode_length=hparams.max_episode_length,
        max_knowledge=hparams.max_knowledge,
        knowledge_truncate=hparams.knowledge_truncate,
        cache_dir=hparams.cache_dir,
        bert_dir=hparams.bert_dir,
    )
    train_dataset, iters_in_train = reader.read('train', mirrored_strategy)
    test_dataset, iters_in_test = reader.read('test', mirrored_strategy)
    if hparams.data_name == 'wizard_of_wikipedia':
        unseen_dataset, iters_in_unseen = reader.read('test_unseen', mirrored_strategy)
    vocabulary = reader.vocabulary

    # Build model & optimizer & trainer
    if mirrored_strategy:
        with mirrored_strategy.scope():
            model = MODELS[hparams.model](hparams, vocabulary)
            optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr,
                                                 clipnorm=hparams.clipnorm)
    else:
        model = MODELS[hparams.model](hparams, vocabulary)
        optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr,
                                                clipnorm=hparams.clipnorm)
    trainer = Trainer(model, optimizer, mirrored_strategy,
                      hparams.enable_function,
                      WowDatasetReader.remove_pad)

    # misc (tensorboard, checkpoints)
    file_writer = tf.summary.create_file_writer(hparams.checkpoint_dir)
    file_writer.set_as_default()
    global_step = tf.compat.v1.train.get_or_create_global_step()
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                    model=model,
                                    optimizer_step=global_step)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    directory=hparams.checkpoint_dir,
                                                    max_to_keep=hparams.max_to_keep)
    checkpoint_tracker = CheckpointTracker(
        hparams.checkpoint_dir, max_to_keep=BEST_N_CHECKPOINTS)

    # Main loop!
    train_dataset_iter = iter(train_dataset)
    for epoch in range(hparams.num_epochs):
        print(hparams.checkpoint_dir)
        base_description = f"(Train) Epoch {epoch}, GPU {hparams.gpus}"
        train_tqdm = trange(iters_in_train, ncols=120, desc=base_description)
        for current_step in train_tqdm:
            example = next(train_dataset_iter)
            global_step.assign_add(1)
            _global_step = int(global_step)

            # Train
            output_dict = trainer.train_step(example)

            # Print model
            if _global_step == 1:
                model.print_model()

            loss_str = str(output_dict['loss'].numpy())
            train_tqdm.set_description(f"{base_description}, Loss {loss_str}")
            with file_writer.as_default():
                if _global_step % int(hparams.logging_step) == 0:
                    tf.summary.histogram('train/vocab', output_dict['sample_ids'], step=_global_step)
                    tf.summary.scalar('train/loss', output_dict['loss'], step=_global_step)
                    tf.summary.scalar('train/gen_loss', output_dict['gen_loss'], step=_global_step)
                    tf.summary.scalar('train/knowledge_loss', output_dict['knowledge_loss'], step=_global_step)
                    tf.summary.scalar('train/kl_loss', output_dict['kl_loss'], step=_global_step)

            # Test
            if _global_step % int(iters_in_train * hparams.evaluation_epoch) == 0:
                checkpoint_manager.save(global_step)

                test_loop_outputs = trainer.test_loop(test_dataset, iters_in_test, epoch, 'seen')
                if hparams.data_name == 'wizard_of_wikipedia':
                    unseen_loop_outputs = trainer.test_loop(unseen_dataset, iters_in_unseen, epoch, 'unseen')

                test_summaries, log_dict = run_wow_evaluation(
                    test_loop_outputs, hparams.checkpoint_dir, 'seen')
                if hparams.data_name == 'wizard_of_wikipedia':
                    unseen_summaries, unseen_log_dict = run_wow_evaluation(
                        unseen_loop_outputs, hparams.checkpoint_dir, 'unseen')

                # Logging
                tqdm.write(colorful.bold_green("seen").styled_string)
                tqdm.write(colorful.bold_red(pformat(log_dict)).styled_string)
                if hparams.data_name == 'wizard_of_wikipedia':
                    tqdm.write(colorful.bold_green("unseen").styled_string)
                    tqdm.write(colorful.bold_red(pformat(unseen_log_dict)).styled_string)

                with file_writer.as_default():
                    for family, test_summary in test_summaries.items():
                        for key, value in test_summary.items():
                            tf.summary.scalar(f'{family}/{key}', value, step=_global_step)
                    if hparams.data_name == 'wizard_of_wikipedia':
                        for family, unseen_summary in unseen_summaries.items():
                            for key, value in unseen_summary.items():
                                tf.summary.scalar(f'{family}/{key}', value, step=_global_step)

                if hparams.keep_best_checkpoint:
                    current_score = log_dict["rouge1"]
                    checkpoint_tracker.update(current_score, _global_step)
示例#22
0
def main():
    # Argument passing/parsing
    args, model_args = config_utils.initialize_argparser(
        MODELS, _command_args, custom_argparsers.DialogArgumentParser)
    hparams, hparams_dict = config_utils.create_or_load_hparams(
        args, model_args, args.cfg)
    pprint(hparams_dict)

    if hparams.test_mode == 'wow':
        os.makedirs('./tmp', exist_ok=True)
        if not os.path.exists('tmp/wow_pretrained'):
            fname = 'wow_pretrained.zip'
            gd_id = '1lkF1QENr45j0vl-Oja3wEiqkxoNTxkXT'
            colorlog.info(f"Download pretrained checkpoint {fname}")
            download_from_google_drive(gd_id, os.path.join('tmp', fname))
            unzip('tmp', fname)
        ckpt_fname = os.path.join('tmp/wow_pretrained', 'ckpt-46070')
    elif hparams.test_mode == "holle_1":
        os.makedirs('./tmp', exist_ok=True)
        if not os.path.exists('tmp/holle_pretrained_1'):
            fname = 'holle_pretrained_1.zip'
            gd_id = '1o1-Gv5PScxlSzxW6DyZnSp3gDI5zXOhh'
            colorlog.info(f"Download pretrained checkpoint {fname}")
            download_from_google_drive(gd_id, os.path.join('tmp', fname))
            unzip('tmp', fname)
        ckpt_fname = os.path.join('tmp/holle_pretrained_1', 'ckpt-1th-best')
    elif hparams.test_mode == "holle_2":
        os.makedirs('./tmp', exist_ok=True)
        if not os.path.exists('tmp/holle_pretrained_2'):
            fname = 'holle_pretrained_2.zip'
            gd_id = '13FkCjuC0aBEenlSf-NAAgOfoWVPhqFSc'
            colorlog.info(f"Download pretrained checkpoint {fname}")
            download_from_google_drive(gd_id, os.path.join('tmp', fname))
            unzip('tmp', fname)
        ckpt_fname = os.path.join('tmp/holle_pretrained_2', 'ckpt-1th-best')
    else:
        raise ValueError("'wow' and 'holle' is currently supported")

    # Set environment variables & gpus
    set_logger()
    set_gpus(hparams.gpus)
    set_tcmalloc()
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus, 'GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # Set random seed
    tf.random.set_seed(hparams.random_seed)
    np.random.seed(hparams.random_seed)
    random.seed(hparams.random_seed)

    # For multi-gpu
    if hparams.num_gpus > 1:
        mirrored_strategy = tf.distribute.MirroredStrategy(
        )  # NCCL will be used as default
    else:
        mirrored_strategy = None

    # Download BERT pretrained model
    if not os.path.exists(hparams.bert_dir):
        os.makedirs(hparams.bert_dir)
        fname = 'uncased_L-12_H-768_A-12.zip'
        gd_id = '17rfV9CleFBwwfS7m5Yd72vvxdPLWBHl6'
        download_from_google_drive(gd_id, os.path.join(hparams.bert_dir,
                                                       fname))
        unzip(hparams.bert_dir, fname)

    # Make dataset reader
    os.makedirs(hparams.cache_dir, exist_ok=True)
    if hparams.data_name == 'wizard_of_wikipedia':
        reader_cls = WowDatasetReader
    elif hparams.data_name == 'holle':
        reader_cls = HolleDatasetReader
    else:
        raise ValueError(
            "data_name must be one of 'wizard_of_wikipedia' and 'holle'")
    reader = reader_cls(
        hparams.batch_size,
        hparams.num_epochs,
        buffer_size=hparams.buffer_size,
        bucket_width=hparams.bucket_width,
        max_length=hparams.max_length,
        max_episode_length=hparams.max_episode_length,
        max_knowledge=hparams.max_knowledge,
        knowledge_truncate=hparams.knowledge_truncate,
        cache_dir=hparams.cache_dir,
        bert_dir=hparams.bert_dir,
    )
    train_dataset, iters_in_train = reader.read('train', mirrored_strategy)
    test_dataset, iters_in_test = reader.read('test', mirrored_strategy)
    if hparams.data_name == 'wizard_of_wikipedia':
        unseen_dataset, iters_in_unseen = reader.read('test_unseen',
                                                      mirrored_strategy)
    vocabulary = reader.vocabulary

    # Build model & optimizer & trainer
    if mirrored_strategy:
        with mirrored_strategy.scope():
            model = MODELS[hparams.model](hparams, vocabulary)
            optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr,
                                                 clipnorm=hparams.clipnorm)
    else:
        model = MODELS[hparams.model](hparams, vocabulary)
        optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr,
                                             clipnorm=hparams.clipnorm)
    trainer = Trainer(model, optimizer, mirrored_strategy,
                      hparams.enable_function, WowDatasetReader.remove_pad)

    # Setup checkpoint
    global_step = tf.compat.v1.train.get_or_create_global_step()
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     optimizer_step=global_step)

    # Load
    train_example = next(iter(train_dataset))
    _ = trainer.train_step(train_example)
    #checkpoint.restore(ckpt_fname).assert_consumed()
    #checkpoint.restore(ckpt_fname).expect_partial()
    checkpoint.restore(ckpt_fname)

    # Test
    test_loop_outputs = trainer.test_loop(test_dataset, iters_in_test, 0,
                                          'seen')
    if hparams.data_name == 'wizard_of_wikipedia':
        unseen_loop_outputs = trainer.test_loop(unseen_dataset,
                                                iters_in_unseen, 0, 'unseen')

    test_summaries, log_dict = run_wow_evaluation(test_loop_outputs,
                                                  hparams.checkpoint_dir,
                                                  'seen')
    if hparams.data_name == 'wizard_of_wikipedia':
        unseen_summaries, unseen_log_dict = run_wow_evaluation(
            unseen_loop_outputs, hparams.checkpoint_dir, 'unseen')

    # Logging
    tqdm.write(colorful.bold_green("seen").styled_string)
    tqdm.write(colorful.bold_red(pformat(log_dict)).styled_string)
    if hparams.data_name == 'wizard_of_wikipedia':
        tqdm.write(colorful.bold_green("unseen").styled_string)
        tqdm.write(colorful.bold_red(pformat(unseen_log_dict)).styled_string)