Exemplo n.º 1
0
def run(args):
    writer = SummaryWriter()
    src, tgt, _, _ = build_dataset(args)

    print('Loading test data split.')
    _, _, test_gen = datasets.Multi30k.splits(
        exts=(build_file_extension(args.src_language),
              build_file_extension(args.tgt_language)),
        fields=(('src', src), ('tgt', tgt)),
        filter_pred=lambda x: len(vars(x)['src']) <= args.max_seq_length and
        len(vars(x)['tgt']) <= args.max_seq_length)
    print('Finished loading test data split.')

    src_vocab_size = len(src.vocab.itos)
    tgt_vocab_size = len(tgt.vocab.itos)

    _, _, test_iterator = data.Iterator.splits(
        (_, _, test_gen),
        sort_key=lambda x: len(x.src),
        batch_sizes=(args.batch_size, args.batch_size, args.batch_size))

    print('Instantiating model...')
    device = args.device
    model = Transformer(src_vocab_size,
                        tgt_vocab_size,
                        device,
                        p_dropout=args.dropout)
    model = model.to(device)
    model.load_state_dict(torch.load(args.model))
    print('Model instantiated!')

    print('Starting testing...')
    test(model, test_iterator, src.vocab, tgt.vocab, args, writer)
    print('Finished testing.')
Exemplo n.º 2
0
 def __init__(self, vocabulary_size_in, vocabulary_size_out, constants,
              hyperparams):
     super(Translator, self).__init__()
     self.Transformer = Transformer(vocabulary_size_in, vocabulary_size_out,
                                    constants, hyperparams)
     self.criterion = nn.CrossEntropyLoss()
     self.optimizer = optim.Adam(self.Transformer.parameters(),
                                 betas=(0.9, 0.98),
                                 eps=1e-9)
     self.scheduler = Scheduler(d_model=hyperparams.D_MODEL,
                                warmup_steps=hyperparams.WARMUP_STEPS)
     self.constants = constants
     self.hyperparams = hyperparams
    def __init__(self, src_vocab, tgt_vocab, src_vocab_size, tgt_vocab_size,
                 args):
        self.max_seq_length = args.max_seq_length
        self.device = args.device
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
        self.beam_size = args.beam_size

        model = Transformer(src_vocab_size, tgt_vocab_size, args.device)
        model.load_state_dict(torch.load(args.model))
        model = model.to(args.device)
        self.model = model
        self.model.eval()
Exemplo n.º 4
0
def do_predict():
    train_iterator, valid_iterator, test_iterator, SRC, TGT = prepare_data_multi30k()
    src_pad_idx = SRC.vocab.stoi[SRC.pad_token]
    tgt_pad_idx = TGT.vocab.stoi[TGT.pad_token]
    src_vocab_size = len(SRC.vocab)
    tgt_vocab_size = len(TGT.vocab)

    model = Transformer(n_src_vocab=src_vocab_size,
                        n_trg_vocab=tgt_vocab_size,
                        src_pad_idx=src_pad_idx,
                        trg_pad_idx=tgt_pad_idx,
                        d_word_vec=256,
                        d_model=256,
                        d_inner=512,
                        n_layer=3,
                        n_head=8,
                        dropout=0.1,
                        n_position=200)
    model.cuda()

    model_dir  = "./checkpoint/transformer"
    model_path = os.path.join(model_dir, "model_9.pt")
    state_dict = torch.load(model_path)
    model.load_state_dict(state_dict)

    model.eval()
    pre_sents = []
    gth_sents = []
    for idx, batch in enumerate(test_iterator):
        if idx % 10 == 0: print("[TIME] --- time: {} --- [TIME]".format(time.ctime(time.time())))
        # src_seq: [seq_len, batch_size]
        # tgt_seq: [seq_len, batch_size]
        src_seq, src_len = batch.src
        tgt_seq, tgt_len = batch.trg

        batch_size = src_seq.size(0)
        pre_tokens = []
        with torch.no_grad():
            for idx in range(batch_size):
                tokens = translate_tokens(src_seq[idx], SRC, TGT, model, max_len=32)
                pre_tokens.append(tokens)

        # tgt: [batch_size, seq_len]
        gth_tokens = tgt_seq.cpu().detach().numpy().tolist()
        for tokens, gth_ids in zip(pre_tokens, gth_tokens):
            gth = [TGT.vocab.itos[idx] for idx in gth_ids]
            pre_sents.append(" ".join(tokens))
            gth_sents.append(" ".join(gth))

    pre_path = os.path.join(model_dir, "pre.json")
    gth_path = os.path.join(model_dir, "gth.json")
    with open(pre_path, "w", encoding="utf-8") as writer:
        json.dump(pre_sents, writer, ensure_ascii=False, indent=4)
    with open(gth_path, "w", encoding="utf-8") as writer:
        json.dump(gth_sents, writer, ensure_ascii=False, indent=4)
Exemplo n.º 5
0
def main():
    pprint(arg)

    # load dataset
    train_loader, valid_loader, test_loader = prepare_dataloaders(arg)
    print("Data loaded. Instances: {} train / {} dev / {} test".format(
        len(train_loader), len(valid_loader), len(test_loader)))

    # prepare model
    device = torch.device('cuda' if arg["cuda"] == True else 'cpu')

    #print(len(train_loader.dataset.w2i)) # nice, we can index internal propertied of CNNDMDataset from the loader!
    print()
    transformer_network = Transformer(
        len(train_loader.dataset.w2i),  # src_vocab_size,
        len(train_loader.dataset.w2i),  # tgt_vocab_size, is equal to src size
        train_loader.dataset.conf[
            "max_sequence_len"],  # max_token_seq_len, from the preprocess config
        tgt_emb_prj_weight_sharing=True,  # opt.proj_share_weight,
        emb_src_tgt_weight_sharing=True,  #opt.embs_share_weight,
        d_k=arg["d_k"],
        d_v=arg["d_v"],
        d_model=arg["d_model"],
        d_word_vec=arg["d_model"],  # d_word_vec,
        d_inner=arg["d_inner_hid"],
        n_layers=arg["n_layers"],
        n_head=arg["n_head"],
        dropout=arg["dropout"]).to(device)

    print("Transformer model initialized.")
    print()

    # train model
    optimizer = transformer.optimizers.ScheduledOptim(
        optim.Adam(
            filter(lambda x: x.requires_grad, transformer_network.parameters()
                   ),  # apply only on parameters that require_grad
            betas=(0.9, 0.98),
            eps=1e-09),
        arg["d_model"],
        arg["n_warmup_steps"])

    train(transformer_network, train_loader, valid_loader, test_loader,
          optimizer, device, arg)
Exemplo n.º 6
0
    def __init__(self, iterator, params, mode):
        """Initialize model, build graph.
        Args:
          params: parameters.
          mode: train | eval | predict mode defined with tf.estimator.ModeKeys.
        """
        # Build graph.
        tf.logging.info("Initializing model, building graph...")
        # Predict single product embedding.
        if mode == tf.estimator.ModeKeys.PREDICT:
            self.encode = Transformer(params, False)(iterator.src)

        else:
            logits = Transformer(params, True)(iterator.src, iterator.tgt)
            with tf.name_scope("loss"):
                self.loss = contrastive_loss(iterator.label, logits)
            with tf.name_scope("accuracy"):
                self.accuracy = compute_accuracy(iterator.label, logits)

        self.model_stats()
Exemplo n.º 7
0
def test_bert_trans():
    if args.bert is True:
        sample_transformer = TransformerBert(num_layers=2,
                                             d_model=512,
                                             num_heads=8,
                                             dff=2048,
                                             input_vocab_size=8500,
                                             target_vocab_size=8000,
                                             model_dir=args.bert_model_dir,
                                             pe_input=10000,
                                             pe_target=6000)
    else:
        sample_transformer = Transformer(num_layers=2,
                                         d_model=512,
                                         num_heads=8,
                                         dff=2048,
                                         input_vocab_size=8500,
                                         target_vocab_size=8000,
                                         pe_input=10000,
                                         pe_target=6000)

    temp_input = tf.random.uniform((64, 38),
                                   dtype=tf.int64,
                                   minval=0,
                                   maxval=200)
    temp_seg = tf.ones((64, 38), dtype=tf.int64)
    temp_target = tf.random.uniform((64, 36),
                                    dtype=tf.int64,
                                    minval=0,
                                    maxval=200)
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        temp_input, temp_target)

    if args.bert is True:
        fn_out, _ = sample_transformer(temp_input,
                                       temp_seg,
                                       temp_target,
                                       training=True,
                                       enc_padding_mask=enc_padding_mask,
                                       look_ahead_mask=combined_mask,
                                       dec_padding_mask=dec_padding_mask)
    else:
        fn_out, _ = sample_transformer(temp_input,
                                       temp_target,
                                       training=False,
                                       enc_padding_mask=None,
                                       look_ahead_mask=None,
                                       dec_padding_mask=None)

    tf.compat.v1.logging.info(
        fn_out.shape)  # (batch_size, tar_seq_len, target_vocab_size)
def run(args):
    writer = SummaryWriter()
    src, tgt, train_iterator, val_iterator = build_dataset(args)

    src_vocab_size = len(src.vocab.itos)
    tgt_vocab_size = len(tgt.vocab.itos)

    print('Instantiating model...')
    device = args.device
    model = Transformer(src_vocab_size,
                        tgt_vocab_size,
                        device,
                        p_dropout=args.dropout)
    model = model.to(device)

    if args.checkpoint is not None:
        model.load_state_dict(torch.load(args.checkpoint))
    else:
        for p in model.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    print('Model instantiated!')

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=(0.9, 0.98),
                           eps=1e-9)

    print('Starting training...')
    for epoch in range(args.epochs):
        acc = train(model, epoch + 1, train_iterator, optimizer, src.vocab,
                    tgt.vocab, args, writer)
        model_file = 'models/model_' + str(epoch) + '_' + str(acc) + '.pth'
        torch.save(model.state_dict(), model_file)
        print('Saved model to ' + model_file)
        validate(model, epoch + 1, val_iterator, src.vocab, tgt.vocab, args,
                 writer)
    print('Finished training.')
Exemplo n.º 9
0
    def __init__(self,
                 tokenizer: Tokenizer,
                 maximum_position_encoding=1000,
                 num_layers=6,
                 d_model=512,
                 num_heads=8,
                 dff=2048,
                 dropout_rate=0.1):
        self.tokenizer = tokenizer
        self.d_model = d_model
        vocab_size = tokenizer.get_num_tokens()

        self.learning_rate = CustomSchedule(d_model)
        self.optimizer = tf.keras.optimizers.Adam(self.learning_rate,
                                                  beta_1=0.9,
                                                  beta_2=0.98,
                                                  epsilon=1e-9)

        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction='none')

        self.train_loss = tf.keras.metrics.Mean(name='train_loss')
        self.train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

        self.transformer = Transformer(num_layers, d_model, num_heads, dff,
                                       vocab_size, maximum_position_encoding,
                                       dropout_rate)

        self.checkpoint_path = './checkpoints/train'

        self.ckpt = tf.train.Checkpoint(transformer=self.transformer,
                                        optimizer=self.optimizer)

        self.ckpt_manager = tf.train.CheckpointManager(self.ckpt,
                                                       self.checkpoint_path,
                                                       max_to_keep=5)

        if self.ckpt_manager.latest_checkpoint:
            self.ckpt.restore(self.ckpt_manager.latest_checkpoint)
Exemplo n.º 10
0
def get_model_gec():
    global args, transformer, tokenizer_ro

    vocab_size = args.dict_size + 2

    learning_rate = CustomSchedule(args.d_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    if args.bert is True:
        transformer = TransformerBert(args.num_layers,
                                      args.d_model,
                                      args.num_heads,
                                      args.dff,
                                      vocab_size,
                                      vocab_size,
                                      model_dir=args.bert_model_dir,
                                      pe_input=vocab_size,
                                      pe_target=vocab_size,
                                      rate=args.dropout,
                                      args=args)
        tf.compat.v1.logging.info('transformer bert loaded')
    else:
        transformer = Transformer(args.num_layers,
                                  args.d_model,
                                  args.num_heads,
                                  args.dff,
                                  vocab_size,
                                  vocab_size,
                                  pe_input=vocab_size,
                                  pe_target=vocab_size,
                                  rate=args.dropout)
    tf.compat.v1.logging.info('transformer model constructed')

    return transformer, optimizer
Exemplo n.º 11
0
def do_train():
    train_iterator, valid_iterator, test_iterator, SRC, TGT = prepare_data_multi30k()

    src_pad_idx = SRC.vocab.stoi[SRC.pad_token]
    tgt_pad_idx = TGT.vocab.stoi[TGT.pad_token]
    src_vocab_size = len(SRC.vocab)
    tgt_vocab_size = len(TGT.vocab)
    model = Transformer(n_src_vocab=src_vocab_size,
                        n_trg_vocab=tgt_vocab_size,
                        src_pad_idx=src_pad_idx,
                        trg_pad_idx=tgt_pad_idx,
                        d_word_vec=256,
                        d_model=256,
                        d_inner=512,
                        n_layer=3,
                        n_head=8,
                        dropout=0.1,
                        n_position=200)

    model.cuda()
    optimizer = Adam(model.parameters(), lr=5e-4)

    num_epoch = 10
    results = []
    model_dir  = os.path.join("./checkpoint/transformer")
    for epoch in range(num_epoch):
        train_loss, train_accuracy = train_epoch(model, optimizer, train_iterator, tgt_pad_idx, smoothing=False)
        eval_loss,  eval_accuracy  = eval_epoch(model, valid_iterator, tgt_pad_idx, smoothing=False)

        os.makedirs(model_dir, exist_ok=True)
        model_path = os.path.join(model_dir, f"model_{epoch}.pt")
        torch.save(model.state_dict(), model_path)

        results.append({"epoch": epoch, "train_loss": train_loss, "eval_loss": eval_loss})
        print("[TIME] --- {} --- [TIME]".format(time.ctime(time.time())))
        print("epoch: {}, train_loss: {}, eval_loss: {}".format(epoch, train_loss, eval_loss))
        print("epoch: {}, train_accuracy: {}, eval_accuracy: {}".format(epoch, train_accuracy, eval_accuracy))

    result_path = os.path.join(model_dir, "result.json")
    with open(result_path, "w", encoding="utf-8") as writer:
        json.dump(results, writer, ensure_ascii=False, indent=4)
Exemplo n.º 12
0
import pickle
import random
import time

import numpy as np
import torch

from config import device, logger, data_file, vocab_file
from transformer.transformer import Transformer

if __name__ == '__main__':
    # filename = 'transformer.pt'
    filename = 'BEST'
    print('loading {}...'.format(filename))
    start = time.time()
    model = Transformer()
    model.load_state_dict(torch.load(filename))
    print('elapsed {} sec'.format(time.time() - start))
    model = model.to(device)
    model.eval()
    assert (1 == 0)
    logger.info('loading samples...')
    start = time.time()
    with open(data_file, 'rb') as file:
        data = pickle.load(file)
        samples = data['valid']
    elapsed = time.time() - start
    logger.info('elapsed: {:.4f} seconds'.format(elapsed))

    logger.info('loading vocab...')
    start = time.time()
Exemplo n.º 13
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_loss = float('inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        # model
        encoder = Encoder(n_src_vocab,
                          args.n_layers_enc,
                          args.n_head,
                          args.d_k,
                          args.d_v,
                          args.d_model,
                          args.d_inner,
                          dropout=args.dropout,
                          pe_maxlen=args.pe_maxlen)
        decoder = Decoder(
            sos_id,
            eos_id,
            n_tgt_vocab,
            args.d_word_vec,
            args.n_layers_dec,
            args.n_head,
            args.d_k,
            args.d_v,
            args.d_model,
            args.d_inner,
            dropout=args.dropout,
            tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
            pe_maxlen=args.pe_maxlen)
        model = Transformer(encoder, decoder)
        # print(model)
        # model = nn.DataParallel(model)

        # optimizer
        optimizer = TransformerOptimizer(
            torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09))

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(device)

    # Custom dataloaders
    train_dataset = AiChallenger2017Dataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               collate_fn=pad_collate,
                                               shuffle=True,
                                               num_workers=args.num_workers)
    valid_dataset = AiChallenger2017Dataset('valid')
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=args.batch_size,
                                               collate_fn=pad_collate,
                                               shuffle=False,
                                               num_workers=args.num_workers)

    # Epochs
    for epoch in range(start_epoch, args.epochs):
        # One epoch's training
        train_loss = train(train_loader=train_loader,
                           model=model,
                           optimizer=optimizer,
                           epoch=epoch,
                           logger=logger,
                           writer=writer)

        writer.add_scalar('epoch/train_loss', train_loss, epoch)
        writer.add_scalar('epoch/learning_rate', optimizer.lr, epoch)

        print('\nLearning rate: {}'.format(optimizer.lr))
        print('Step num: {}\n'.format(optimizer.step_num))

        # One epoch's validation
        valid_loss = valid(valid_loader=valid_loader,
                           model=model,
                           logger=logger)
        writer.add_scalar('epoch/valid_loss', valid_loss, epoch)

        # Check if there was an improvement
        is_best = valid_loss < best_loss
        best_loss = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                        best_loss, is_best)
Exemplo n.º 14
0
def main():
    global char2index
    global index2char
    global SOS_token
    global EOS_token
    global PAD_token

    parser = argparse.ArgumentParser(description='Speech hackathon Baseline')

    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='batch size in training (default: 32)')
    parser.add_argument(
        '--workers',
        type=int,
        default=4,
        help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=10,
                        help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--teacher_forcing',
                        type=float,
                        default=0.5,
                        help='teacher forcing ratio in decoder (default: 0.5)')
    parser.add_argument('--max_len',
                        type=int,
                        default=WORD_MAXLEN,
                        help='maximum characters of sentence (default: 80)')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--save_name',
                        type=str,
                        default='model',
                        help='the name of model in nsml or local')
    parser.add_argument('--mode', type=str, default='train')
    parser.add_argument("--pause", type=int, default=0)
    parser.add_argument(
        '--word',
        action='store_true',
        help='Train/Predict model using word based label (default: False)')
    parser.add_argument('--gen_label_index',
                        action='store_true',
                        help='Generate word label index map(default: False)')
    parser.add_argument('--iteration', type=str, help='Iteratiom')
    parser.add_argument('--premodel_session',
                        type=str,
                        help='Session name of premodel')

    # transformer model parameter
    parser.add_argument('--d_model',
                        type=int,
                        default=128,
                        help='transformer_d_model')
    parser.add_argument('--n_head',
                        type=int,
                        default=8,
                        help='transformer_n_head')
    parser.add_argument('--num_encoder_layers',
                        type=int,
                        default=4,
                        help='num_encoder_layers')
    parser.add_argument('--num_decoder_layers',
                        type=int,
                        default=4,
                        help='transformer_num_decoder_layers')
    parser.add_argument('--dim_feedforward',
                        type=int,
                        default=2048,
                        help='transformer_d_model')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.1,
                        help='transformer_dropout')

    # transformer warmup parameter
    parser.add_argument('--warmup_multiplier',
                        type=int,
                        default=3,
                        help='transformer_warmup_multiplier')
    parser.add_argument('--warmup_epoch',
                        type=int,
                        default=10,
                        help='transformer_warmup_epoch')

    args = parser.parse_args()
    char_loader = CharLabelLoader()
    char_loader.load_char2index('./hackathon.labels')
    label_loader = char_loader
    if args.word:
        if args.gen_label_index:
            generate_word_label_index_file(char_loader, TRAIN_LABEL_CHAR_PATH)
            from subprocess import call
            call(f'cat {TRAIN_LABEL_CHAR_PATH}', shell=True)
        # ??? ??? ??? ??
        word_loader = CharLabelLoader()
        word_loader.load_char2index('./hackathon.pos.labels')
        label_loader = word_loader
        if os.path.exists(TRAIN_LABEL_CHAR_PATH):
            generate_word_label_file(char_loader, word_loader,
                                     TRAIN_LABEL_POS_PATH,
                                     TRAIN_LABEL_CHAR_PATH)
    char2index = label_loader.char2index
    index2char = label_loader.index2char
    SOS_token = char2index['<s>']
    EOS_token = char2index['</s>']
    PAD_token = char2index['_']
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    ############ model
    print("model: transformer")
    # model = Transformer(d_model= args.d_model, n_head= args.n_head, num_encoder_layers= args.num_encoder_layers, num_decoder_layers= args.num_decoder_layers,
    #                     dim_feedforward= args.dim_feedforward, dropout= args.dropout, vocab_size= len(char2index), sound_maxlen= SOUND_MAXLEN, word_maxlen= WORD_MAXLEN)

    encoder = Encoder(d_input=128,
                      n_layers=6,
                      n_head=4,
                      d_k=128,
                      d_v=128,
                      d_model=128,
                      d_inner=2048,
                      dropout=0.1,
                      pe_maxlen=SOUND_MAXLEN)
    decoder = Decoder(sos_id=SOS_token,
                      eos_id=EOS_token,
                      n_tgt_vocab=len(char2index),
                      d_word_vec=128,
                      n_layers=6,
                      n_head=4,
                      d_k=128,
                      d_v=128,
                      d_model=128,
                      d_inner=2048,
                      dropout=0.1,
                      tgt_emb_prj_weight_sharing=True,
                      pe_maxlen=SOUND_MAXLEN)
    model = Transformer(encoder, decoder)

    optimizer = TransformerOptimizer(
        torch.optim.Adam(model.parameters(),
                         lr=0.0004,
                         betas=(0.9, 0.98),
                         eps=1e-09))

    ############/

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)

    model = nn.DataParallel(model).to(device)
    """
    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)

    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.max_epochs)
    scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=args.warmup_multiplier, total_epoch=args.warmup_epoch, after_scheduler=scheduler_cosine)
    
    
    criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device)
    """

    bind_model(model, optimizer)

    if args.pause == 1:
        nsml.paused(scope=locals())

    if args.mode != "train":
        return

    data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv')
    wav_paths = list()
    script_paths = list()

    with open(data_list, 'r') as f:
        for line in f:
            # line: "aaa.wav,aaa.label"

            wav_path, script_path = line.strip().split(',')
            wav_paths.append(os.path.join(DATASET_PATH, 'train_data',
                                          wav_path))
            script_paths.append(
                os.path.join(DATASET_PATH, 'train_data', script_path))

    best_loss = 1e10
    begin_epoch = 0

    # load all target scripts for reducing disk i/o
    # target_path = os.path.join(DATASET_PATH, 'train_label')
    target_path = TRAIN_LABEL_CHAR_PATH
    if args.word:
        target_path = TRAIN_LABEL_POS_PATH
    load_targets(target_path)

    train_batch_num, train_dataset_list, valid_dataset = split_dataset(
        args, wav_paths, script_paths, valid_ratio=0.05)

    if args.iteration:
        if args.premodel_session:
            nsml.load(args.iteration, session=args.premodel_session)
            logger.info(f'Load {args.premodel_session} {args.iteration}')
        else:
            nsml.load(args.iteration)
            logger.info(f'Load {args.iteration}')
    logger.info('start')

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):
        # learning rate scheduler

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue,
                                   args.batch_size, args.workers)
        train_loader.start()

        train_loss, train_cer = train(model, train_batch_num, train_queue,
                                      optimizer, device, train_begin,
                                      args.workers, 10, args.teacher_forcing)
        logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' %
                    (epoch, train_loss, train_cer))

        train_loader.join()

        print("~~~~~~~~~~~~")

        if epoch == 10 or (epoch > 48 and epoch % 10 == 9):
            valid_queue = queue.Queue(args.workers * 2)
            valid_loader = BaseDataLoader(valid_dataset, valid_queue,
                                          args.batch_size, 0)
            valid_loader.start()

            eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue,
                                           device, args.max_len,
                                           args.batch_size)
            logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' %
                        (epoch, eval_loss, eval_cer))

            valid_loader.join()

            nsml.report(False,
                        step=epoch,
                        train_epoch__loss=train_loss,
                        train_epoch__cer=train_cer,
                        eval__loss=eval_loss,
                        eval__cer=eval_cer)

            best_model = (eval_loss < best_loss)
            nsml.save(args.save_name)

            if best_model:
                nsml.save('best')
                best_loss = eval_loss
Exemplo n.º 15
0
def train_net(args):
    # 为了保证程序执行结果一致, 给随机化设定种子
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint

    start_epoch = 0
    writer = SummaryWriter()

    if checkpoint is None:
        # model
        encoder = Encoder(Config.vocab_size, args.n_layers_enc, args.n_head,
                          args.d_k, args.d_v, args.d_model, args.d_inner,
                          dropout=args.dropout, pe_maxlen=args.pe_maxlen)

        decoder = Decoder(Config.sos_id, Config.eos_id, Config.vocab_size,
                          args.d_word_vec, args.n_layers_dec, args.n_head,
                          args.d_k, args.d_v, args.d_model, args.d_inner,
                          dropout=args.dropout,
                          tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
                          pe_maxlen=args.pe_maxlen)

        model = Transformer(encoder, decoder)

        # optimizer
        optimizer = TransformerOptimizer(
            torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09))

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(Config.device)

    # Custom dataloaders  数据的加载 注意这里指定了一个参数collate_fn代表的数据需要padding
    train_dataset = TranslateDataset()

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=pad_collate,
                                               shuffle=True, num_workers=args.num_workers)

    # Epochs
    Loss_list = []
    for epoch in range(start_epoch, args.epochs):
        # One epoch's training
        train_loss = train(train_loader=train_loader,
                           model=model,
                           optimizer=optimizer,
                           epoch=epoch,
                           logger=logger,
                           writer=writer)

        l = str(train_loss)
        Loss_list.append(l)

        l_temp = l + '\n'
        with open('loss_epoch.txt', 'a+') as f:
            f.write(l_temp)

        writer.add_scalar('epoch/train_loss', train_loss, epoch)
        writer.add_scalar('epoch/learning_rate', optimizer.lr, epoch)

        print('\nLearning rate: {}'.format(optimizer.lr))
        print('Step num: {}\n'.format(optimizer.step_num))

        # Save checkpoint
        save_checkpoint(epoch, model, optimizer, train_loss)
    with open('loss.txt', 'w') as f:
        f.write('\n'.join(Loss_list))
Exemplo n.º 16
0
def main() -> None:
    # Configure command line flags.
    parser = argparse.ArgumentParser(
        description='Validate TZ zone files with ZoneSpecifier.')

    # Extractor flags.
    parser.add_argument('--input_dir',
                        help='Location of the input directory',
                        required=True)

    # Transformer flags.
    parser.add_argument(
        '--scope',
        # basic: 241 of the simpler time zones for BasicZoneSpecifier
        # extended: all 348 time zones for ExtendedZoneSpecifier
        choices=['basic', 'extended'],
        help='Size of the generated database (basic|extended)',
        required=True,
    )
    parser.add_argument(
        '--start_year',
        help='Start year of Zone Eras (default: 2000)',
        type=int,
        default=2000,
    )
    parser.add_argument(
        '--until_year',
        help='Until year of Zone Eras (default: 2038)',
        type=int,
        default=2038,
    )

    parser.add_argument(
        '--granularity',
        help=(
            'If given, overrides the other granularity flags to '
            'truncate UNTIL, AT, STDOFF (offset), SAVE (delta) and '
            'RULES (rulesDelta) fields to this many seconds (default: None)'),
        type=int,
    )
    parser.add_argument(
        '--until_at_granularity',
        help=(
            'Truncate UNTIL and AT fields to this many seconds (default: 60)'),
        type=int,
    )
    parser.add_argument(
        '--offset_granularity',
        help=('Truncate STDOFF (offset) fields to this many seconds'
              '(default: 900 (basic), 60 (extended))'),
        type=int,
    )
    parser.add_argument(
        '--delta_granularity',
        help=('Truncate SAVE (delta) and RULES (rulesDelta) field to this many'
              'seconds (default: 900)'),
        type=int,
    )

    parser.add_argument(
        '--strict',
        help='Remove zones and rules not aligned at granularity time boundary',
        action='store_true',
        default=True,
    )
    parser.add_argument(
        '--nostrict',
        help='Retain zones and rules not aligned at granularity time boundary',
        action='store_false',
        dest='strict',
    )

    # Validator flags.
    parser.add_argument(
        '--zone',
        help='Name of time zone to validate (default: all zones)',
    )
    parser.add_argument(
        '--year',
        help='Year to validate (default: start_year, until_year)',
        type=int,
    )
    parser.add_argument('--validate_buffer_size',
                        help='Validate the transition buffer size',
                        action="store_true")
    parser.add_argument('--validate_test_data',
                        help='Validate the TestDataGenerator with pytz',
                        action="store_true")
    parser.add_argument(
        '--validate_dst_offset',
        # Not enabled by default because pytz DST seems to be buggy.
        help='Validate the DST offset as well as the total UTC offset',
        action="store_true")
    parser.add_argument('--debug_validator',
                        help='Enable debug output from Validator',
                        action="store_true")

    # ZoneSpecifier flags
    parser.add_argument(
        '--viewing_months',
        help='Number of months to use for calculations (13, 14, 36)',
        type=int,
        default=14)
    parser.add_argument('--debug_specifier',
                        help='Enable debug output from ZoneSpecifier',
                        action="store_true")
    parser.add_argument(
        '--in_place_transitions',
        help='Use in-place Transition array to determine Active Transitions',
        action="store_true")
    parser.add_argument('--optimize_candidates',
                        help='Optimize the candidate transitions',
                        action='store_true')

    # TestDataGenerator flag.
    #
    # pytz cannot handle dates after the end of 32-bit Unix time_t type
    # (2038-01-19T03:14:07Z), see
    # https://answers.launchpad.net/pytz/+question/262216, so the
    # validation_until_year cannot be greater than 2038.
    parser.add_argument(
        '--validation_start_year',
        help='Start year of ZoneSpecifier validation (default: start_year)',
        type=int,
        default=0)
    parser.add_argument(
        '--validation_until_year',
        help='Until year of ZoneSpecifier validation (default: 2038)',
        type=int,
        default=0)

    # Parse the command line arguments
    args = parser.parse_args()

    # Configure logging. This should normally be executed after the
    # parser.parse_args() because it allows us set the logging.level using a
    # flag.
    logging.basicConfig(level=logging.INFO)

    # Define scope-dependent granularity if not overridden by flag
    if args.granularity:
        until_at_granularity = args.granularity
        offset_granularity = args.granularity
        delta_granularity = args.granularity
    else:
        if args.until_at_granularity:
            until_at_granularity = args.until_at_granularity
        else:
            until_at_granularity = 60

        if args.offset_granularity:
            offset_granularity = args.offset_granularity
        else:
            if args.scope == 'basic':
                offset_granularity = 900
            else:
                offset_granularity = 60

        if args.delta_granularity:
            delta_granularity = args.delta_granularity
        else:
            delta_granularity = 900

    logging.info('Granularity for UNTIL/AT: %d', until_at_granularity)
    logging.info('Granularity for STDOFF (offset): %d', offset_granularity)
    logging.info(
        'Granularity for RULES (rulesDelta) and SAVE (delta): %d',
        delta_granularity,
    )

    # Extract the TZ files
    logging.info('======== Extracting TZ Data files')
    extractor = Extractor(args.input_dir)
    extractor.parse()
    extractor.print_summary()
    policies_map, zones_map, links_map = extractor.get_data()

    # Create initial TransformerResult
    tresult = TransformerResult(
        zones_map=zones_map,
        policies_map=policies_map,
        links_map=links_map,
        removed_zones={},
        removed_policies={},
        removed_links={},
        notable_zones={},
        notable_policies={},
        notable_links={},
        zone_ids={},
        letters_per_policy={},
        letters_map={},
        formats_map={},
    )

    # Transform the TZ zones and rules
    logging.info('======== Transforming Zones and Rules')
    logging.info('Extracting years [%d, %d)', args.start_year, args.until_year)
    transformer = Transformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
    )
    transformer.transform()
    transformer.print_summary()
    tresult = transformer.get_data()

    # Generate internal versions of zone_infos and zone_policies
    # so that ZoneSpecifier can be created.
    logging.info('======== Generating inlined zone_infos and zone_policies')
    inline_zone_info = InlineZoneInfo(tresult.zones_map, tresult.policies_map)
    zone_infos, zone_policies = inline_zone_info.generate_zonedb()
    logging.info('Inlined zone_infos=%d; zone_policies=%d', len(zone_infos),
                 len(zone_policies))

    # Set the defaults for validation_start_year and validation_until_year
    # if they were not specified.
    validation_start_year = (args.start_year if args.validation_start_year == 0
                             else args.validation_start_year)
    validation_until_year = (args.until_year if args.validation_until_year == 0
                             else args.validation_until_year)

    validate(
        zone_infos=zone_infos,
        zone_policies=zone_policies,
        zone=args.zone,
        year=args.year,
        start_year=validation_start_year,
        until_year=validation_until_year,
        validate_buffer_size=args.validate_buffer_size,
        validate_test_data=args.validate_test_data,
        viewing_months=args.viewing_months,
        validate_dst_offset=args.validate_dst_offset,
        debug_validator=args.debug_validator,
        debug_specifier=args.debug_specifier,
        in_place_transitions=args.in_place_transitions,
        optimize_candidates=args.optimize_candidates,
    )

    logging.info('======== Finished processing TZ Data files.')
Exemplo n.º 17
0
def sample_output(
        model: transformer.Transformer,
        input_seq: torch.LongTensor,
        eos_index: int,
        pad_index: int,
        max_len: int
) -> torch.LongTensor:
    """Samples an output sequence based on the provided input.
    
    Args:
        model (:class:`transformer.Transformer`): The model to use.
        input_seq (torch.LongTensor): The input sequence to be provided to the model. This has to be a
            (batch-size x input-seq-len)-tensor.
        eos_index (int): The index that indicates the end of a sequence.
        pad_index (int): The index that indicates a padding token in a sequence.
        max_len (int): The maximum length of the generated output.
    
    Returns:
        torch.LongTensor: The generated output sequence as (batch-size x output-seq-len)-tensor.
    """
    # sanitize args
    if not isinstance(model, transformer.Transformer):
        raise TypeError("The <model> has to be a transformer.Transformer!")
    if not isinstance(input_seq, torch.LongTensor) and not isinstance(input_seq, torch.cuda.LongTensor):
        raise TypeError("The <input_seq> has to be a LongTensor!")
    if input_seq.dim() != 2:
        raise ValueError("<input_seq> has to be a matrix!")
    if not isinstance(eos_index, int):
        raise TypeError("The <eos_index> has to be an integer!")
    if eos_index < 0 or eos_index >= model.output_size:
        raise ValueError("The <eos_index> is not a legal index in the vocabulary used by <model>!")
    if not isinstance(pad_index, int):
        raise TypeError("The <pad_index> has to be an integer!")
    if pad_index < 0 or pad_index >= model.output_size:
        raise ValueError("The <pad_index> is not a legal index in the vocabulary used by <model>!")
    if max_len is not None:
        if not isinstance(max_len, int):
            raise TypeError("<max_len> has to be an integer!")
        if max_len < 1:
            raise ValueError("<max_len> has to be > 0!")
    
    original_mode = model.training  # the original mode (train/eval) of the provided model
    batch_size = input_seq.size(0)  # number of samples in the provided input sequence
    
    # put model in evaluation mode
    model.eval()
    
    output_seq = []  # used to store the generated outputs for each position
    finished = [False] * batch_size
    
    for _ in range(max_len):
        
        # prepare the target to provide to the model
        # this is the current output with an additional final entry that is supposed to be predicted next
        # (which is why the concrete value does not matter)
        current_target = torch.cat(output_seq + [input_seq.new(batch_size, 1).zero_()], dim=1)
        
        # run the model
        probs = model(input_seq, current_target)[:, -1, :]
        
        # sample next output form the computed probabilities
        output = torch.multinomial(probs, 1)
        
        # determine which samples have been finished, and replace sampled output with padding for those that are already
        for sample_idx in range(batch_size):
            if finished[sample_idx]:
                output[sample_idx, 0] = pad_index
            elif output[sample_idx, 0].item() == eos_index:
                finished[sample_idx] = True
        
        # store created output
        output_seq.append(output)
        
        # check whether generation has been finished
        if all(finished):
            break
    
    # restore original mode of the model
    model.train(mode=original_mode)
    
    return torch.cat(output_seq, dim=1)
Exemplo n.º 18
0
if __name__ == '__main__':

    MAX_SEQUENCE_LENGTH = 10  # [number of tokens]

    # for reproducible results:
    make_results_reproducible()

    print("\nA Toy Source-to-Target Copy Task")

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # initializing the model:
    model = Transformer(src_vocabulary_dimension=11,
                        tgt_vocabulary_dimension=11,
                        n_encoder_blocks=6,
                        n_decoder_blocks=6,
                        representation_dimension=512,
                        feedforward_dimension=2048,
                        n_attention_heads=8,
                        max_sequence_length=MAX_SEQUENCE_LENGTH,
                        dropout_prob=0.1)

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # evaluating a single prediction before training:
    print("\nEvaluating a single prediction before training:")
    src_sequence = torch_unsqueeze(
        input=tensor(list(range(1, MAX_SEQUENCE_LENGTH + 1))),  # noqa: E501 pylint: disable=not-callable
        # input=tensor([2] * MAX_SEQUENCE_LENGTH),  # TODO  # noqa: E501 pylint: disable=not-callable
        dim=0)
    src_sequence_mask = torch_ones((1, 1, MAX_SEQUENCE_LENGTH))
    tgt_sequence_prediction = model.predict(src_sequences=src_sequence,
                                            src_masks=src_sequence_mask,
Exemplo n.º 19
0
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)

    for i in range(3):
        print("\n***** Utt", i + 1)
        Ti = i + 20
        input = torch.randn(Ti, D)
        length = torch.tensor([Ti], dtype=torch.int)
        nbest_hyps = model.recognize(input, length, char_list, args)

    file_path = "./temp.pth"
    optimizer = torch.optim.Adam(model.parameters())
    torch.save(model.serialize(model, optimizer, 1, LFR_m=1, LFR_n=1),
               file_path)
    model, LFR_m, LFR_n = Transformer.load_model(file_path)
    print(model)
Exemplo n.º 20
0
def eval_probability(
        model: transformer.Transformer,
        input_seq: torch.LongTensor,
        target_seq: torch.LongTensor,
        pad_index: int=None
) -> torch.FloatTensor:
    """Computes the probability that the provided model computes a target sequence given an input sequence.
    
    Args:
         model (:class:`transformer.Transformer`): The model to use.
         input_seq (torch.LongTensor): The input sequence to be provided to the model. This has to be a
            (batch-size x input-seq-len)-tensor.
         target_seq (torch.LongTensor): The target sequence whose probability is being evaluated. This has to be a
            (batch-size x target-seq-len)-tensor.
         pad_index (int, optional): The index that indicates a padding token in a sequence. If ``target_seq`` is padded,
            then the ``pad_index`` has to be provided in order to allow for computing the probabilities for relevant
            parts of the target sequence only.
    
    Returns:
        torch.FloatTensor: A 1D-tensor of size (batch-size), which contains one probability for each sample in
            ``input_seq`` and ``target_seq``, respectively.
    """
    if not isinstance(model, transformer.Transformer):
        raise TypeError("The <model> has to be a transformer.Transformer!")
    if not isinstance(input_seq, torch.LongTensor) and not isinstance(input_seq, torch.cuda.LongTensor):
        raise TypeError("The <input_seq> has to be a LongTensor!")
    if input_seq.dim() != 2:
        raise ValueError("<input_seq> has to be a 2D-tensor!")
    if input_seq.is_cuda:
        if not isinstance(target_seq, torch.cuda.LongTensor):
            raise TypeError("The <target_seq> has to be of the same type as <input_seq>, i.e., cuda.LongTensor!")
    elif not isinstance(target_seq, torch.LongTensor):
        raise TypeError("The <target_seq> has to be of the same type as <input_seq>, i.e., LongTensor!")
    if target_seq.dim() != 2:
        raise ValueError("<input_seq> has to be a 2D-tensor!")
    if input_seq.size(0) != target_seq.size(0):
        raise ValueError("<input_seq> and <target_seq> use different batch sizes!")
    if pad_index is not None and not isinstance(pad_index, int):
        raise TypeError("The <pad_index>, if provided, has to be an integer!")
    
    batch_size = input_seq.size(0)
    max_seq_len = input_seq.size(1)
    
    # put model in evaluation mode
    original_mode = model.training  # store original mode (train/eval) to be restored eventually
    model.eval()
    
    # run the model to compute the needed probabilities
    predictions = model(input_seq, target_seq)
    
    # determine the lengths of the target sequences
    if pad_index is not None:
        mask = util.create_padding_mask(target_seq, pad_index)[:, 0, :]
        seq_len = mask.sum(dim=1).cpu().numpy().tolist()
    else:
        seq_len = (np.ones(batch_size, dtype=np.long) * max_seq_len).tolist()
    
    # compute the probabilities for each of the provided samples
    sample_probs = torch.ones(batch_size)
    for sample_idx in range(batch_size):  # iterate over each sample
        for token_idx in range(seq_len[sample_idx]):  # iterate over each position in the output sequence
            sample_probs[sample_idx] *= predictions[sample_idx, token_idx, target_seq[sample_idx, token_idx]].item()

    # restore original mode of the model
    model.train(mode=original_mode)
    
    return sample_probs
Exemplo n.º 21
0
"""
import time
import torch
from transformer.transformer import Transformer


if __name__ == '__main__':
    '''
    从tar中提取模型 整理成pt文件
    '''
    checkpoint = 'BEST_Model.tar'
    print('loading {}...'.format(checkpoint))
    start = time.time()
    checkpoint = torch.load(checkpoint)
    print('elapsed {} sec'.format(time.time() - start))
    model = checkpoint['model']
    print(type(model))

    filename = 'reading_comprehension.pt'
    print('saving {}...'.format(filename))
    start = time.time()
    torch.save(model.state_dict(), filename)
    print('elapsed {} sec'.format(time.time() - start))

    print('loading {}...'.format(filename))
    start = time.time()
    model = Transformer()
    model.load_state_dict(torch.load(filename))
    print('elapsed {} sec'.format(time.time() - start))
Exemplo n.º 22
0
def main() -> None:
    """
    Main driver for TZ Database compiler which parses the IANA TZ Database files
    located at the --input_dir and generates zoneinfo files and validation
    datasets for unit tests at --output_dir.

    Usage:
        tzcompiler.py [flags...]
    """
    # Configure command line flags.
    parser = argparse.ArgumentParser(description='Generate Zone Info.')

    # Extractor flags.
    parser.add_argument('--input_dir',
                        help='Location of the input directory',
                        required=True)

    # Transformer flags.
    parser.add_argument(
        '--scope',
        # basic: 241 of the simpler time zones for BasicZoneSpecifier
        # extended: all 348 time zones for ExtendedZoneSpecifier
        choices=['basic', 'extended'],
        help='Size of the generated database (basic|extended)',
        required=True)
    parser.add_argument('--start_year',
                        help='Start year of Zone Eras (default: 2000)',
                        type=int,
                        default=2000)
    parser.add_argument('--until_year',
                        help='Until year of Zone Eras (default: 2038)',
                        type=int,
                        default=2038)

    parser.add_argument(
        '--granularity',
        help=(
            'If given, overrides the other granularity flags to '
            'truncate UNTIL, AT, STDOFF (offset), SAVE (delta) and '
            'RULES (rulesDelta) fields to this many seconds (default: None)'),
        type=int,
    )
    parser.add_argument(
        '--until_at_granularity',
        help=(
            'Truncate UNTIL and AT fields to this many seconds (default: 60)'),
        type=int,
    )
    parser.add_argument(
        '--offset_granularity',
        help=('Truncate STDOFF (offset) fields to this many seconds'
              '(default: 900 (basic), 60 (extended))'),
        type=int,
    )
    parser.add_argument(
        '--delta_granularity',
        help=('Truncate SAVE (delta) and RULES (rulesDelta) field to this many'
              'seconds (default: 900)'),
        type=int,
    )

    # Make --strict the default, --nostrict optional.
    parser.add_argument(
        '--strict',
        help='Remove zones and rules not aligned at granularity time boundary',
        action='store_true',
        default=True,
    )
    parser.add_argument(
        '--nostrict',
        help='Retain zones and rules not aligned at granularity time boundary',
        action='store_false',
        dest='strict',
    )

    # Data pipeline selectors. Reduced down to a single 'zonedb' option which
    # is the default.
    parser.add_argument(
        '--action',
        help='Action to perform (zonedb)',
        default='zonedb',
    )

    # Language selector (for --action zonedb).
    parser.add_argument(
        '--language',
        help='Comma-separated list of target languages '
        '(arduino|python|json|zonelist)',
        default='',
    )

    # C++ namespace names for '--language arduino'. If not specified, it will
    # automatically be set to 'zonedb' or 'zonedbx' depending on the 'scope'.
    parser.add_argument(
        '--db_namespace',
        help='C++ namespace for the zonedb files (default: zonedb or zonedbx)',
    )

    # For language=json, specify the output file.
    parser.add_argument(
        '--json_file',
        help='The JSON output file (default: zonedb.json)',
        default='zonedb.json',
    )

    # The tz_version does not affect any data processing. Its value is
    # copied into the various generated files and usually placed in the
    # comments section to describe the source of the data that generated the
    # various files.
    parser.add_argument(
        '--tz_version',
        help='Version string of the TZ files',
        required=True,
    )

    # Target location of the generated files.
    parser.add_argument(
        '--output_dir',
        help='Location of the output directory',
        default='',
    )

    # Flag to ignore max_buf_size check. Needed on ExtendedHinnantDateTest if we
    # want to test the extended year range from 1974 to 2050, because one of the
    # zones requires a buf_size=9, but ExtendedZoneProcessor only supports 8.
    parser.add_argument(
        '--ignore_buf_size_too_large',
        help='Ignore transition buf size too large',
        action='store_true',
    )

    # Parse the command line arguments
    args = parser.parse_args()

    # Manually parse the comma-separated --action.
    languages = set(args.language.split(','))
    allowed_languages = set(['arduino', 'python', 'json', 'zonelist'])
    if not languages.issubset(allowed_languages):
        print(f'Invalid --language: {languages - allowed_languages}')
        sys.exit(1)

    # Configure logging. This should normally be executed after the
    # parser.parse_args() because it allows us set the logging.level using a
    # flag.
    logging.basicConfig(level=logging.INFO)

    # How the script was invoked
    invocation = ' '.join(sys.argv)

    # Define scope-dependent granularity if not overridden by flag
    if args.granularity:
        until_at_granularity = args.granularity
        offset_granularity = args.granularity
        delta_granularity = args.granularity
    else:
        if args.until_at_granularity:
            until_at_granularity = args.until_at_granularity
        else:
            until_at_granularity = 60

        if args.offset_granularity:
            offset_granularity = args.offset_granularity
        else:
            if args.scope == 'basic':
                offset_granularity = 900
            else:
                offset_granularity = 60

        if args.delta_granularity:
            delta_granularity = args.delta_granularity
        else:
            delta_granularity = 900

    logging.info('======== TZ Compiler settings')
    logging.info(f'Scope: {args.scope}')
    logging.info(
        f'Start year: {args.start_year}; Until year: {args.until_year}')
    logging.info(f'Strict: {args.strict}')
    logging.info(f'TZ Version: {args.tz_version}')
    logging.info('Ignore too large transition buf_size: '
                 f'{args.ignore_buf_size_too_large}')
    logging.info('Granularity for UNTIL/AT: %d', until_at_granularity)
    logging.info('Granularity for STDOFF (offset): %d', offset_granularity)
    logging.info(
        'Granularity for RULES (rulesDelta) and SAVE (delta): %d',
        delta_granularity,
    )

    # Extract the TZ files
    logging.info('======== Extracting TZ Data files')
    extractor = Extractor(args.input_dir)
    extractor.parse()
    extractor.print_summary()
    policies_map, zones_map, links_map = extractor.get_data()

    # Create initial TransformerResult
    tresult = TransformerResult(
        zones_map=zones_map,
        policies_map=policies_map,
        links_map=links_map,
        removed_zones={},
        removed_policies={},
        removed_links={},
        notable_zones={},
        notable_policies={},
        notable_links={},
        zone_ids={},
        letters_per_policy={},
        letters_map={},
        formats_map={},
    )

    # Transform the TZ zones and rules
    logging.info('======== Transforming Zones and Rules')
    logging.info('Extracting years [%d, %d)', args.start_year, args.until_year)
    transformer = Transformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
    )
    transformer.transform()
    transformer.print_summary()
    tresult = transformer.get_data()

    # Generate the fields for the Arduino zoneinfo data.
    logging.info('======== Transforming to Arduino Zones and Rules')
    arduino_transformer = ArduinoTransformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
    )
    arduino_transformer.transform()
    arduino_transformer.print_summary()
    tresult = arduino_transformer.get_data()

    # Estimate the buffer size of ExtendedZoneProcessor.TransitionStorage.
    logging.info('======== Estimating transition buffer sizes')
    logging.info('Checking years in [%d, %d)', args.start_year,
                 args.until_year)
    estimator = BufSizeEstimator(
        zones_map=tresult.zones_map,
        policies_map=tresult.policies_map,
        start_year=args.start_year,
        until_year=args.until_year,
    )
    buf_size_info: BufSizeInfo = estimator.estimate()

    # Check if the estimated buffer size is too big
    if buf_size_info['max_buf_size'] > EXTENDED_ZONE_PROCESSOR_MAX_TRANSITIONS:
        msg = (f"Max buffer size={buf_size_info['max_buf_size']} "
               f"is larger than ExtendedZoneProcessor.kMaxTransitions="
               f"{EXTENDED_ZONE_PROCESSOR_MAX_TRANSITIONS}")
        if args.ignore_buf_size_too_large:
            logging.warning(msg)
        else:
            raise Exception(msg)

    # Collect TZ DB data into a single JSON-serializable object.
    zidb = create_zone_info_database(
        tz_version=args.tz_version,
        tz_files=Extractor.ZONE_FILES,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
        tresult=tresult,
        buf_size_info=buf_size_info,
    )

    if args.action == 'zonedb':
        logging.info('======== Generating zonedb files')
        for language in languages:
            generate_zonedb(
                invocation=invocation,
                db_namespace=args.db_namespace,
                language=language,
                output_dir=args.output_dir,
                zidb=zidb,
                json_file=args.json_file,
            )
    else:
        logging.error(f"Unrecognized action '{args.action}'")
        sys.exit(1)

    logging.info('======== Finished processing TZ Data files.')
Exemplo n.º 23
0
"""
# flat_shape = hp["numcep"] * hp["nb_time"]
d_input = hp["numcep"]
label_shape = len(train_speaker_list)

# model
d_m = hp["d_m"]
encoder = Encoder(d_input=d_input,
                  n_layers=2,
                  d_k=d_m,
                  d_v=d_m,
                  d_m=d_m,
                  d_ff=hp["d_ff"],
                  dropout=0.1).to(device)
pooling = SelfAttentionPooling(d_m, dropout=0.1).to(device)
model = Transformer(encoder, pooling, d_m, label_shape, dropout=0.2).to(device)

opt = torch.optim.Adam(model.parameters(),
                       lr=hp["lr"],
                       weight_decay=hp["weight_decay"])

loss_func = torch.nn.CrossEntropyLoss()

best_eer = 99.
if hp["comet"]:
    with experiment.train():
        for epoch in tqdm(range(epochs)):
            cce_loss = fit(model, loss_func, opt, train_ds_gen, device)
            experiment.log_metric("cce", cce_loss, epoch=epoch)

            val_eer = test(model,
Exemplo n.º 24
0
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)

    optimizer = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.k, args.d_model, args.warmup_steps)

    print(args.k)
    print(args.d_model)
    print(args.warmup_steps)

    lr_list = []
    for step_num in range(1, 50000):
        # print(step_num)
        lr_1 = k * init_lr * min(step_num**(-0.5),
                                 step_num * (warmup_steps**(-1.5)))
        optimizer.step()
Exemplo n.º 25
0
import time

import numpy as np
import torch

from config import Config, logger
from transformer.transformer import Transformer

if __name__ == '__main__':
    # 先去执行export.py 把模型导出来
    filename = 'reading_comprehension.pt'  # 导出模型所放的位置

    print('loading {}...'.format(filename))
    start = time.time()

    model = Transformer()
    model.load_state_dict(torch.load(filename))

    print('elapsed {} sec'.format(time.time() - start))
    model = model.to(Config.device)
    model.eval()

    # 加载测试集
    logger.info('loading samples...')
    start = time.time()
    with open(Config.data_file, 'rb') as file:
        data = pickle.load(file)
        samples = data
    elapsed = time.time() - start
    logger.info('elapsed: {:.4f} seconds'.format(elapsed))
Exemplo n.º 26
0
class Translator(nn.Module):
    def __init__(self, vocabulary_size_in, vocabulary_size_out, constants,
                 hyperparams):
        super(Translator, self).__init__()
        self.Transformer = Transformer(vocabulary_size_in, vocabulary_size_out,
                                       constants, hyperparams)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.Transformer.parameters(),
                                    betas=(0.9, 0.98),
                                    eps=1e-9)
        self.scheduler = Scheduler(d_model=hyperparams.D_MODEL,
                                   warmup_steps=hyperparams.WARMUP_STEPS)
        self.constants = constants
        self.hyperparams = hyperparams

    def count_parameters(self):
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

    def fit(self, training_steps, data_training, data_eval=None):
        '''
        Arg:
            data_training: iterator which gives two batches: one of source language and one for target language
        '''
        writer = SummaryWriter()
        training_loss, gradient_norm = [], []

        for i in tqdm(range(training_steps)):
            X, Y = next(data_training)
            batch_size = X.shape[0]
            bos = torch.zeros(batch_size, 1).fill_(self.constants.BOS_IDX).to(
                self.constants.DEVICE, torch.LongTensor)
            translation = torch.cat((bos, Y[:, :-1]), dim=1)
            output = self.Transformer(X, translation)
            output = output.contiguous().view(-1, output.size(-1))
            target = Y.contiguous().view(-1)
            lr = self.scheduler.step()
            for p in self.optimizer.param_groups:
                p['lr'] = lr
            self.optimizer.zero_grad()
            loss = self.criterion(output, target)
            training_loss.append(loss.item())
            loss.backward()
            self.optimizer.step()
            temp = 0
            for p in self.Transformer.parameters():
                temp += torch.sum(p.grad.data**2)
            temp = np.sqrt(temp.cpu())
            gradient_norm.append(temp)

            if ((i + 1) % self.hyperparams.EVAL_EVERY_TIMESTEPS) == 0:
                torch.save(self.state_dict(), self.constants.WEIGHTS_FILE)
                writer.add_scalar('0_training_set/loss',
                                  np.mean(training_loss), i)
                writer.add_scalar('0_training_set/gradient_norm',
                                  np.mean(gradient_norm), i)
                writer.add_scalar('2_other/lr', lr, i)
                training_loss, gradient_norm = [], []

                if data_eval:
                    eval_references = []
                    eval_hypotheses = []
                    for l, (X_batch, Y_batch) in enumerate(data_eval):
                        for i in range(Y_batch.shape[0]):
                            eval_references.append(data_eval.itotok(
                                Y_batch[i]))
                        hypotheses = self.translate(X_batch)
                        for i in range(len(hypotheses)):
                            eval_hypotheses.append(
                                data_eval.itotok(hypotheses[i]))

                    def subwords_to_string(subwords):
                        string = ""
                        for subword in subwords:
                            if subword[-2:] == "@@":
                                string += subword[:-2]
                            elif subword != self.constants.PADDING_WORD:
                                string += subword + " "
                        return string

                    for i, (ref, hyp) in enumerate(
                            zip(eval_references, eval_hypotheses)):
                        eval_references[i] = subwords_to_string(ref)
                        eval_hypotheses[i] = subwords_to_string(hyp)

                    ex_phrases = ''
                    for i, (ref, hyp) in enumerate(
                            zip(eval_references, eval_hypotheses)):
                        ex_phrases = ex_phrases + "\n truth: " + ref + "\n prediction: " + hyp + "\n"
                        if i == 4:
                            break

                    BLEU = nltk.translate.bleu_score.corpus_bleu(
                        eval_references, eval_hypotheses)
                    writer.add_scalar('1_eval_set/BLEU', BLEU, i)
                    writer.add_text('examples', ex_phrases, i)

    def translate(self, X):
        '''
        Arg:
            X: batch of phrases to translate: tensor(nb_texts, nb_tokens)
        '''
        self.train(False)
        batch_size, max_seq = X.shape
        max_seq += 10  #TODO: remove hard code
        temp = torch.zeros(batch_size, max_seq).type(torch.LongTensor).to(
            self.constants.DEVICE)
        temp[:, 0] = self.constants.BOS_IDX
        enc = self.Transformer.forward_encoder(X)
        for j in range(1, max_seq):
            output = self.Transformer.forward_decoder(X, enc, temp)
            output = torch.argmax(output, dim=-1)
            temp[:, j] = output[:, j - 1]
        #remove padding
        translations = []
        for translation in temp:
            temp2 = []
            for i in range(max_seq):
                if translation[i] == self.constants.PADDING_IDX:
                    break
                if i != 0:
                    temp2.append(translation[i])
            translations.append(temp2)
        return translations