Exemplo n.º 1
0
def extract_sentiment_words():
    # create vocabulary using wikitext2
    train_txt, _, _ = torchtext.datasets.WikiText2.splits(TEXT)
    TEXT.build_vocab(train_txt)

    start = time.time()
    x_train, y_train, x_val, y_val, rtrain, rtest = preprocess()
    end = time.time()

    print("PREPROCESSING TIME: {}".format(end - start))
    
    ntokens = len(TEXT.vocab.stoi) # the size of vocabulary
    
    # FIXME set up batched examples for better generality
    # batch_size = 20
    # eval_batch_size = 10

    # configs
    emsize = 200 # embedding dimension
    nhid = 200 # feedforward dimension
    nlayers = 2 # n encoders
    nhead = 2 # multiattention heads
    dropout = 0.2 # the dropout value

    # initialize main torch vars
    model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    lr = 0.05 # learning rate

    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

    best_val_loss = float("inf")
    epochs = 50
    best_model = None
    
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train_model(x_train, y_train, model, criterion, optimizer, scheduler, epoch)
        val_loss = evaluate(x_val, y_val,rtest, model,criterion)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
            'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                        val_loss, math.exp(val_loss)))
        print('-' * 89)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model

        scheduler.step()
    
    # test_loss = evaluate(best_model, criterion, test_data)

    # print('=' * 89)
    # print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    #     test_loss, math.exp(test_loss)))
    # print('=' * 89)
    return best_model
Exemplo n.º 2
0
    def set_transformer_model(self):
        '''
        This Function loads the base transformer model.
        
        Args:
            transformer_config_path : config path(yaml) of the transformer
            transformer_weights_path : optional . if given loads the weight as well
        
        Returns:None
        '''

        # load base transformer model from config
        with open(self.args.transformer_config_path, 'r') as file:
            config= yaml.load(file, yaml.FullLoader)        

        model_config = TransformerConfig(config)
        input_dim = config['transformer']['input_dim']
        
        dr= model_config.downsample_rate
        hidden_size = model_config.hidden_size
        output_attention= False
        
        base_transformer_model = TransformerModel(model_config,input_dim,output_attentions=output_attention).to('cpu')

        #load weights
        if self.args.transformer_weights_path:
            ckpt = torch.load(self.args.transformer_weights_path, map_location='cpu')
            base_transformer_model.load_state_dict(ckpt['Transformer'])

        self.base_transformer_model = base_transformer_model
Exemplo n.º 3
0
    def build_model(self):
        self.model = TransformerModel(self.opt, self.dict)
        # todo
        if self.opt['embedding_type'] != 'random':
            pass

        if self.opt['load_dict'] is not None:
            logger.info('[ Loading existing model params from {} ]'
                        ''.format(self.opt['load_dict']))
            self.model.load_model(self.opt['load_dict'])

        if self.use_cuda:
            self.model.to(self.device)
Exemplo n.º 4
0
def evaluate(sentence):
    sentence = preprocess_sentence(sentence)
    sentence = tf.expand_dims(START_TOKEN + tokenizer.encode(sentence) +
                              END_TOKEN,
                              axis=0)

    output = tf.expand_dims(START_TOKEN, 0)

    test_Transformer = TransformerModel(max_length=MAX_LENGTH,
                                        vocab_size=VOCAB_SIZE,
                                        embedding_matrix=emb_matrix)
    test_model = test_Transformer.model
    test_model.load_weights(checkpoint_path)

    for i in range(MAX_LENGTH):
        predictions = model(inputs=[sentence, output], training=False)

        # select the last word from the seq_len dimension
        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # return the result if the predicted_id is equal to the end token
        if tf.equal(predicted_id, END_TOKEN[0]):
            break

    # concatenated the predicted_id to the output which is given to the decoder
    # as its input
    output = tf.concat([output, predicted_id], axis=-1)

    return tf.squeeze(output, axis=0)
Exemplo n.º 5
0
def inference():
    eval_model = TransformerModel.load_from_checkpoint(
        './lightning_logs/version_0/checkpoints/epoch=8-step=539.ckpt',
        d_model=250,
        n_heads=10,
        n_layers=1)
    eval_model.freeze()
    n_steps = 1000

    test_data = pd.read_csv('./data/toy_data/test.csv').to_numpy()
    train_data = pd.read_csv('./data/toy_data/train.csv').to_numpy()

    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(train_data)
    test_data = torch.tensor(scaler.transform(test_data).reshape(-1)).float()

    with torch.no_grad():
        for i in range(0, n_steps):
            # data = torch.cat((test_data[-99:], torch.tensor([0]).float()))
            data = test_data[-100:, ]
            output = eval_model(data.reshape(-1, 1).unsqueeze(-1))
            output = torch.flatten(output)
            test_data = torch.cat((test_data, output[-1:]))

    test_data = test_data.cpu().view(-1)

    # I used this plot to visualize if the model pics up any long therm struccture within the data.
    plt.plot(test_data[600:], color="red")
    plt.plot(test_data[600:1000], color="blue")
    plt.grid(True, which='both')
    plt.axhline(y=0, color='k')
    plt.show()
    pass
Exemplo n.º 6
0
def main(_):
    vocab_path = FLAGS.vocab_path
    model_dir = FLAGS.model_dir

    encoder_stack_size = FLAGS.encoder_stack_size
    decoder_stack_size = FLAGS.decoder_stack_size
    hidden_size = FLAGS.hidden_size
    num_heads = FLAGS.num_heads
    filter_size = FLAGS.filter_size
    dropout_rate = FLAGS.dropout_rate

    extra_decode_length = FLAGS.extra_decode_length
    beam_width = FLAGS.beam_width
    alpha = FLAGS.alpha
    decode_batch_size = FLAGS.decode_batch_size
    src_max_length = FLAGS.src_max_length

    source_text_filename = FLAGS.source_text_filename
    target_text_filename = FLAGS.target_text_filename
    translation_output_filename = FLAGS.translation_output_filename

    # transformer model
    subtokenizer = tokenization.restore_subtokenizer_from_vocab_files(
        vocab_path)
    vocab_size = subtokenizer.vocab_size
    model = TransformerModel(vocab_size=vocab_size,
                             encoder_stack_size=encoder_stack_size,
                             decoder_stack_size=decoder_stack_size,
                             hidden_size=hidden_size,
                             num_heads=num_heads,
                             filter_size=filter_size,
                             dropout_rate=dropout_rate,
                             extra_decode_length=extra_decode_length,
                             beam_width=beam_width,
                             alpha=alpha)

    ckpt = tf.train.Checkpoint(model=model)
    latest_ckpt = tf.train.latest_checkpoint(model_dir)
    if latest_ckpt is None:
        raise ValueError('No checkpoint is found in %s' % model_dir)
    print('Loaded latest checkpoint ', latest_ckpt)
    ckpt.restore(latest_ckpt).expect_partial()

    # build evaluator
    evaluator = SequenceTransducerEvaluator(model, subtokenizer,
                                            decode_batch_size, src_max_length)

    # translates input sequences, and optionally evaluates BLEU score if
    # groundtruth target sequences are provided
    if target_text_filename is not None:
        case_insensitive_score, case_sensitive_score = evaluator.evaluate(
            source_text_filename, target_text_filename,
            translation_output_filename)
        print('BLEU(case insensitive): %f' % case_insensitive_score)
        print('BLEU(case sensitive): %f' % case_sensitive_score)
    else:
        evaluator.translate(source_text_filename, translation_output_filename)
        print(
            'Inference mode: no groundtruth translations.\nTranslations written '
            'to file "%s"' % translation_output_filename)
Exemplo n.º 7
0
def main(_):
    data_dir = FLAGS.data_dir
    vocab_path = FLAGS.vocab_path
    model_dir = FLAGS.model_dir

    encoder_stack_size = FLAGS.encoder_stack_size
    decoder_stack_size = FLAGS.decoder_stack_size
    hidden_size = FLAGS.hidden_size
    num_heads = FLAGS.num_heads
    filter_size = FLAGS.filter_size
    dropout_rate = FLAGS.dropout_rate

    max_num_tokens = FLAGS.max_num_tokens
    max_length = FLAGS.max_length
    num_parallel_calls = FLAGS.num_parallel_calls

    learning_rate = FLAGS.learning_rate
    learning_rate_warmup_steps = FLAGS.learning_rate_warmup_steps
    optimizer_adam_beta1 = FLAGS.optimizer_adam_beta1
    optimizer_adam_beta2 = FLAGS.optimizer_adam_beta2
    optimizer_adam_epsilon = FLAGS.optimizer_adam_epsilon

    label_smoothing = FLAGS.label_smoothing
    num_steps = FLAGS.num_steps
    save_ckpt_per_steps = FLAGS.save_ckpt_per_steps

    # transformer model
    subtokenizer = tokenization.restore_subtokenizer_from_vocab_files(
        vocab_path)
    vocab_size = subtokenizer.vocab_size
    model = TransformerModel(vocab_size=vocab_size,
                             encoder_stack_size=encoder_stack_size,
                             decoder_stack_size=decoder_stack_size,
                             hidden_size=hidden_size,
                             num_heads=num_heads,
                             filter_size=filter_size,
                             dropout_rate=dropout_rate)

    # training dataset
    builder = dataset.DynamicBatchDatasetBuilder(max_num_tokens, True,
                                                 max_length,
                                                 num_parallel_calls)
    filenames = sorted(glob.glob(os.path.join(data_dir, SUFFIX)))
    train_ds = builder.build_dataset(filenames)

    # learning rate and optimizer
    optimizer = tf.keras.optimizers.Adam(utils.LearningRateSchedule(
        learning_rate, hidden_size, learning_rate_warmup_steps),
                                         optimizer_adam_beta1,
                                         optimizer_adam_beta2,
                                         epsilon=optimizer_adam_epsilon)

    # checkpoint
    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)

    # build trainer and start training
    trainer = SequenceTransducerTrainer(model, label_smoothing)
    trainer.train(train_ds, optimizer, ckpt, model_dir, num_steps,
                  save_ckpt_per_steps)
Exemplo n.º 8
0
def train(opt, train_data, eval_data=None):
    logger.info("start training task")
    dim_input = 6
    dim_emb = 64
    num_class = train_data.num_class
    transformer_nhead = 2
    transformer_nlayers = 1
    model = TransformerModel(dim_input, dim_emb, transformer_nhead,
        num_class,
        transformer_nlayers)
    if model.cuda:
        model = move_to_gpu(model)
    summary(model, train_data[0]['x'].shape)
    try:
        dataloader = DataLoader(
            train_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=4
        )
        logger.info("create training dataloader")
    except Exception as e:
        logger.error("fail to create dataloader", e)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=model.optimizer,
            milestones=[5, 10], gamma=0.1)

    model_path = os.path.join(opt.model_dir,opt.model_name+".pth")
    global_steps = 0
    best = 0
    for epoch in tqdm(list(range(opt.epoch)), desc='epoch'):
        for step, batch in enumerate(dataloader):
            global_steps += 1
            metrics = model.train(batch)
            if global_steps % opt.log_steps == 0:
                logger.debug(f"global steps={global_steps},{metrics}")
            if global_steps % opt.save_steps == 0:
                val_metrics, eval_result = eval(opt, model, eval_data)
                logger.info(f"global steps={global_steps}, current={val_metrics}, best={best}, result={eval_result}")
                if val_metrics > best:
                    best = val_metrics
                    torch.save(model.state_dict(), model_path)
                    logger.info(f"global steps={global_steps}, save model:{model_path}")
        lr_scheduler.step()
Exemplo n.º 9
0
    def __init__(self, context: PyTorchTrialContext):
        self.context = context
        data_config = self.context.get_data_config()
        hparams = self.context.get_hparams()
        using_bind_mount = data_config["use_bind_mount"]
        use_cache = data_config["use_cache"]
        self.eval_batch_size = hparams["eval_batch_size"]

        download_directory = (
            Path(data_config["bind_mount_path"]) if using_bind_mount else
            Path("/data")) / f"data-rank{self.context.distributed.get_rank()}"

        self.corpus = data.load_and_cache_dataset(download_directory,
                                                  use_cache)
        self.model_cls = hparams["model_cls"]
        emsize = hparams["word_embeddings_size"]
        num_hidden = hparams["num_hidden"]
        num_layers = hparams["num_layers"]
        dropout = hparams["dropout"]
        self.bptt = hparams["bptt"]

        if self.model_cls.lower() == "transformer":
            num_heads = hparams["num_heads"]
            self.model = TransformerModel(self.corpus.ntokens, emsize,
                                          num_heads, num_hidden, num_layers,
                                          dropout)
        else:
            tied = hparams["tied"]
            self.model = RNNModel(
                self.model_cls,
                self.corpus.ntokens,
                emsize,
                num_hidden,
                num_layers,
                dropout,
                tied,
            )

        self.model = self.context.wrap_model(self.model)
        self.criterion = nn.NLLLoss()

        lr = hparams["lr"]
        optimizer = torch.optim.SGD(self.model.parameters(), lr=lr)
        self.optimizer = self.context.wrap_optimizer(optimizer)

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            torch.optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                factor=0.25,
                patience=0,
                threshold=0.001,
                threshold_mode="abs",
                verbose=True,
            ),
            LRScheduler.StepMode.MANUAL_STEP,
        )
Exemplo n.º 10
0
 def __init__(self, mask, hps):
     super(Seq2Seq, self).__init__()
     
     self.hps = hps
     self.vocab_size = hps.vocab_size
     self.emb_dim = hps.emb_dim
     self.max_len = hps.max_len
     self.batch_size = hps.batch_size
     self.test_batch_size = hps.test_batch_size
     
     self.mask = mask
     
     args = DEFAULT_CONFIG
     shared_args = DEFAULT_SHARED_CONFIG
     self.irony_encoder = TransformerModel(args, self.vocab_size + self.max_len, self.max_len)
     self.non_encoder = TransformerModel(args, self.vocab_size + self.max_len, self.max_len)
     self.shared_encoder = SharedTransformerModel(shared_args, self.vocab_size + self.max_len, self.max_len)
     self.shared_decoder = SharedTransformerModel(shared_args, self.vocab_size + self.max_len, self.max_len)
     self.irony_decoder = TransformerDecoder(args, self.vocab_size + self.max_len, self.max_len, True)
     self.non_decoder = TransformerDecoder(args, self.vocab_size + self.max_len, self.max_len, True)
Exemplo n.º 11
0
def main(args):
    random_seed(args.seed)
    device = torch.device("cuda" if args.cuda else "cpu")

    corpus = data.Corpus(args.data)
    train_data = batchify(corpus.train, args.batch_size)
    val_data = batchify(corpus.valid, args.batch_size)
    test_data = batchify(corpus.test, args.batch_size)
    print('loaded data')
    print(f'number of unique tokens: {len(corpus.dictionary)}')

    ntokens = len(corpus.dictionary)
    if args.model == 'Transformer':
        model = TransformerModel(
            ntokens,
            args.emsize,
            args.nhead,
            args.nhid,
            args.nlayers,
            args.dropout).to(device)
    else:
        model = RNNModel(
            args.model,
            ntokens,
            args.emsize,
            args.nhid,
            args.nlayers,
            args.dropout,
            args.tied).to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                    max_lr=0.001,
                                                    steps_per_epoch=len(list(range(0,
                                                                                   train_data.size(
                                                                                       0) - 1,
                                                                                   args.bptt))),
                                                    epochs=args.epochs,
                                                    anneal_strategy='linear')
    print('initialized model and optimizer')
    train(args, model, optimizer, train_data, val_data, scheduler)
def main():
    parser = argparse.ArgumentParser(description="Train GPT2 Model")
    parser.add_argument("--batch_size",
                        type=int,
                        default=4,
                        help="Specify batch size")
    parser.add_argument("--num_epoch",
                        type=int,
                        default=3,
                        help="Specify number of epochs")
    parser.add_argument("--learning_rate",
                        type=float,
                        default=5e-5,
                        help="Specify AdamW learning rate")

    args = parser.parse_args()

    setup = models.trav_trans.dataset.Setup("output", "output/train_dps.txt",
                                            "output/train_ids.txt")

    layers = [1, 3, 6, 9]

    for l in layers:
        model = TransformerModel(
            len(setup.vocab.idx2vocab),
            CrossEntropyLoss(ignore_index=setup.vocab.pad_idx), l, 300, 1000,
            6, 1e-05)

        training_args = TrainingArgs(batch_size=args.batch_size,
                                     num_epoch=args.num_epoch,
                                     output_dir="output",
                                     optimizer=AdamW(model.parameters(),
                                                     lr=args.learning_rate),
                                     save_model_on_epoch=False,
                                     suffix=f"{l}-layers")

        trainer = Trainer(model, setup, training_args)

        trainer.train()
def main():
    args = get_args()
    args.n_gpu = 1

    set_seed(args)

    # Construct tokenizer
    tokenizer = CharTokenizer([])
    tokenizer.load(args.load_vocab)
    args.vocab_size = len(tokenizer)

    logger.info(f"args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}")

    # GPU setting
    os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Construct model
    model = TransformerModel(
        vocab_size=args.vocab_size,
        hidden_size=args.hidden_size,
        num_attention_heads=args.num_attention_heads,
        num_encoder_layers=args.num_encoder_layers,
        num_decoder_layers=args.num_decoder_layers,
        intermediate_size=args.intermediate_size,
        dropout=args.dropout,
    ).to(args.device)
    logger.info(
        f"# of model parameters: {sum(p.numel() for p in model.parameters()) * 1e-6:.2f}M"
    )

    # Load data
    noisy_sents = read_strings(os.path.join('sejong_corpus', args.noisy_file))
    clean_sents = read_strings(os.path.join('sejong_corpus', args.clean_file))
    sents_annotation = ['None'] * len(noisy_sents)

    pairs = [{
        "noisy": noisy,
        "clean": clean,
        "annotation": annot
    } for noisy, clean, annot in zip(noisy_sents, clean_sents,
                                     sents_annotation)]

    # Train-validation split
    train_data, valid_data = train_test_split(
        pairs, test_size=args.val_ratio,
        random_state=args.seed)  # test: about 1000
    logger.info(f"# of train data: {len(train_data)}")
    logger.info(f"# of valid data: {len(valid_data)}")

    train(model, tokenizer, train_data, valid_data, args, eos=args.eos_setting)
Exemplo n.º 14
0
def train():
    # data module
    dm = TSDataModule("", seq_len=100, batch_size=32)
    dm.setup()
    # model
    model = TransformerModel(250, 10, 1)

    # trainer
    trainer = pl.Trainer(gradient_clip_val=0.7)

    trainer.fit(model=model, datamodule=dm)
    # prediction

    pass
Exemplo n.º 15
0
def evaluate(sentence):
    sentence = preprocess_sentence(sentence)

    vocab_filename = "vocab_" + language + ".txt"
    tokenizer = tfds.features.text.SubwordTextEncoder.load_from_file(
        vocab_filename)
    # Vocabulary size plus start and end token
    VOCAB_SIZE = tokenizer.vocab_size + 2
    # Define start and end token to indicate the start and end of a sentence
    START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size + 1]

    emb_matrix = load_embeddings(vocab_size=VOCAB_SIZE,
                                 tokenizer=tokenizer,
                                 language=language)
    Transformer = TransformerModel(max_length=MAX_LENGTH,
                                   vocab_size=VOCAB_SIZE,
                                   embedding_matrix=emb_matrix)

    sentence = tf.expand_dims(START_TOKEN + tokenizer.encode(sentence) +
                              END_TOKEN,
                              axis=0)
    output = tf.expand_dims(START_TOKEN, 0)

    # Create a new basic model instance
    model = Transformer.model

    checkpoint_path = loadCheckpoint_chat(VOCAB_SIZE)
    try:
        model.load_weights(checkpoint_path)
        print("Model loaded from checkpoint " + checkpoint_path + "Loaded")
    except ValueError:
        print("Error loading checkpoint " + checkpoint_path)
        print("ValueError:" + str(ValueError))

    for i in range(MAX_LENGTH):
        predictions = model(inputs=[sentence, output], training=False)

        # select the last word from the seq_len dimension
        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # return the result if the predicted_id is equal to the end token
        if tf.equal(predicted_id, END_TOKEN[0]):
            break

        # concatenated the predicted_id to the output which is given to the decoder
        # as its input.
        output = tf.concat([output, predicted_id], axis=-1)

    return tf.squeeze(output, axis=0), tokenizer
Exemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser(description="Train GPT2 Model")
    parser.add_argument("--batch_size", type=int, default=4, help="Specify batch size")
    parser.add_argument("--num_epoch", type=int, default=3, help="Specify number of epochs")
    parser.add_argument("--learning_rate", type=float, default=5e-5, help="Specify AdamW learning rate")

    args = parser.parse_args()

    tokenizer = Tokenizer.from_file("output/tokenizer.json")
    dataset = Dataset("output/train_rq4_dps.txt")

    model = TransformerModel(
        tokenizer.get_vocab_size(),
        CrossEntropyLoss(ignore_index=tokenizer.encode("[PAD]").ids[0]),
        6,
        300,
        1000,
        6,
        1e-05
    )

    training_args = TrainingArgs(
        batch_size = args.batch_size,
        num_epoch = args.num_epoch,
        output_dir = "output",
        optimizer = AdamW(model.parameters(), lr=args.learning_rate),
        save_model_on_epoch = False
    )

    trainer = Trainer(
        model,
        dataset,
        tokenizer,
        training_args
    )

    trainer.train()
Exemplo n.º 17
0
def main():

    voc_size = args.vocab_sz
    print("Setting model...", end="")
    model = TransformerModel(
        input_sz=voc_size,
        output_sz=voc_size,
        d_model=args.d_model,
        nhead=args.n_head,
        num_encoder_layers=args.n_encoder_layers,
        num_decoder_layers=args.n_decoder_layers,
        dim_feedforward=args.dim_feedforward,
        dropout=args.dropout,
    )
    model.load_state_dict(flow.load(args.load_dir))
    model = to_cuda(model)
    print("Done")

    print("Inference:")
    num = args.input_start
    if num % 2 != 0:
        print("The input number must be an even number.")
        return
    if num > args.vocab_sz - MAX_LEN * 2:
        print("The input sequence may be out of range.")
        return

    input_nums = [num + i * 2 for i in range(MAX_LEN)]
    src = to_cuda(flow.tensor(input_nums)).unsqueeze(1)
    pred = [0]
    for i in range(MAX_LEN):
        inp = to_cuda(flow.tensor(pred)).unsqueeze(1)
        output = model(src, inp)
        out_num = output.argmax(2)[-1].numpy()[0]
        pred.append(out_num)
    print("input:", input_nums)
    print("pred:", pred)
Exemplo n.º 18
0
def main():
    model = TransformerModel(ntoken=100,
                             ninp=8000,
                             nhead=8,
                             nhid=10000,
                             nlayers=1).to('cuda')
    time_steps = 64
    batch_size = 128
    input = torch.zeros(time_steps,
                        batch_size,
                        dtype=torch.int64,
                        device='cuda')
    with measure():
        output = model(input)
        print(output[0, 0])
Exemplo n.º 19
0
def main():
    model = TransformerModel(ntoken=100,
                             ninp=8000,
                             nhead=8,
                             nhid=10000,
                             nlayers=1).to('cuda')
    time_steps = 64
    batch_size = 128
    input = torch.zeros(time_steps,
                        batch_size,
                        dtype=torch.int64,
                        device='cuda')
    output = model(input)
    torch.cuda.synchronize()
    with measure():
        for i in range(4):
            output = model(input)
        torch.cuda.synchronize()
Exemplo n.º 20
0
def main():
    print("Generating data...", end="")
    voc_size = args.vocab_sz
    inp = np.arange(2, voc_size, 2)
    tgt = np.arange(3, voc_size, 2)
    data_x, data_y = get_numbers(inp, tgt)
    train_len = int(len(data_x) * 0.9)
    train_x, val_x = data_x[:train_len], data_x[train_len:]
    train_y, val_y = data_y[:train_len], data_y[train_len:]
    print("Done")

    print("Setting model...", end="")
    model = TransformerModel(
        input_sz=voc_size,
        output_sz=voc_size,
        d_model=args.d_model,
        nhead=args.n_head,
        num_encoder_layers=args.n_encoder_layers,
        num_decoder_layers=args.n_decoder_layers,
        dim_feedforward=args.dim_feedforward,
        dropout=args.dropout,
    )
    if args.load_dir != ".":
        model.load_state_dict(flow.load(args.load_dir))
    model = to_cuda(model)
    criterion = to_cuda(nn.CrossEntropyLoss())

    optimizer = flow.optim.Adam(model.parameters(), lr=args.lr)
    print("Done")

    print("Training...")

    min_loss = 100
    for i in range(1, args.n_epochs + 1):
        epoch_loss = train(model, criterion, optimizer, train_x, train_y)
        epoch_loss_val = validation(model, criterion, val_x, val_y)
        print("epoch: {} train loss: {}".format(i, epoch_loss))
        print("epoch: {} val loss: {}".format(i, epoch_loss_val))
        if epoch_loss < min_loss:
            if not os.path.exists(args.save_dir):
                os.mkdir(args.save_dir)
            else:
                shutil.rmtree(args.save_dir)
                assert not os.path.exists(args.save_dir)
                os.mkdir(args.save_dir)
            flow.save(model.state_dict(), args.save_dir)
        if i % 3 == 2:
            print(test(model, test_times=10))
Exemplo n.º 21
0
def main(model_name=None, hidden=64, nlayers=1):
    voc_size = 10000
    inp = arange(2, voc_size, 2)
    tgt = arange(3, voc_size, 2)
    batch_size = 128
    epochs = 30
    dataset = NumberLoader(inp, tgt)
    train_len = int(len(dataset) * 0.9)
    val_len = len(dataset) - train_len
    train_set, val_set = random_split(dataset, [train_len, val_len])
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1)
    model = TransformerModel(voc_size,
                             voc_size,
                             hidden=hidden,
                             nlayers=nlayers)
    if model_name is not None:
        model.load_state_dict(load(model_name))
    model = model.cuda()
    # optimizer = optim.SGD(model.parameters(), lr=0.5)
    optimizer = optim.Adam(model.parameters())
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
    criterion = nn.CrossEntropyLoss()
    best_loss = 100
    for i in range(epochs):
        epoch_loss = train(model, criterion, optimizer, train_loader)
        epoch_loss_val = validation(model, criterion, val_loader)
        # scheduler.step()
        print("epoch: {} train loss: {}".format(i, epoch_loss))
        print("epoch: {} val loss: {}".format(i, epoch_loss_val))
        if epoch_loss_val < best_loss:
            best_loss = epoch_loss_val
            model_name = "model/model_{0:.5f}.pt".format(epoch_loss_val)
            save(model.state_dict(), model_name)
    return model_name
Exemplo n.º 22
0
    ninput = M + n_meds
    emsize = 512  # embedding dimension
    nhid = 2048  # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers = 6  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
    nhead = 8  # the number of heads in the multiheadattention models
    dropout = 0.3

    sequence_len = 42  # 1 week, 4-hr average
    n_mc_smps = 20

    model = TransformerModel(M=M,
                             n_meds=n_meds,
                             n_covs=n_covs,
                             sequence_len=sequence_len,
                             emsize=emsize,
                             nhead=nhead,
                             nhid=nhid,
                             nlayers=nlayers,
                             n_mc_smps=n_mc_smps,
                             dropout=dropout).to(globals.device)

    print("data fully setup!")

    ### Training parameters
    criterion = nn.BCEWithLogitsLoss(reduction='sum')
    lr = 0.03
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

    ### Training
    best_val_loss = float("inf")
Exemplo n.º 23
0
train_on_gpu = False

vocab = pickle.load(
    open("models/transformer/vocab_siamzone-v4-space.pkl", "rb"))

vocab_to_int = vocab["vocab_to_int"]
int_to_vocab = vocab["int_to_vocab"]

ntokens = len(vocab_to_int)
emsize = 512
nhid = 512
nlayers = 4
nhead = 4
dropout = 0.2

model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers,
                         dropout).to(device)

model_save_path = "./models/transformer/lm-siamzone-v4-space-342.pkl"
model.load_state_dict(
    torch.load(model_save_path, map_location=torch.device("cpu")))
model.eval()

print("Model initialized")


def top_k_top_p_filtering(logits,
                          top_k,
                          top_p,
                          temperature,
                          filter_value=-float("Inf")):
    # Hugging Face script to apply top k and nucleus sampling
Exemplo n.º 24
0
from data_load import vocab, train_data, get_batch, bptt, val_data
from model import TransformerModel
import torch.nn as nn
import torch
import math
from tqdm import tqdm

ntokens = len(vocab.stoi)  # the size of vocabulary
emsize = 200  # embedding dimension
nhid = 200  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2  # the number of heads in the multiheadattention models
dropout = 0.2  # the dropout value
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers,
                         dropout).to(device)

criterion = nn.CrossEntropyLoss()
lr = 5.0  # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

import time


def train():
    model.train()  # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    src_mask = model.generate_square_subsequent_mask(bptt).to(device)
    for batch, i in tqdm(enumerate(range(0, train_data.size(0) - 1, bptt))):
Exemplo n.º 25
0
    return j
glove_embed = open_it(params.glove_embed)

########## Load dataset #############
dataset_object = wtwtDataset()
train_dataset = dataset_object.train_dataset
eval_dataset = dataset_object.eval_dataset
if params.dummy_run:
    eval_dataset = train_dataset
    target_names = []
else:
    eval_dataset = dataset_object.eval_dataset
    target_names = [dataset_object.id2stance[id_] for id_ in range(0, 4)]

########## Create model #############
model = TransformerModel(glove_embed, params.glove_dims, params.trans_ip_dims, params.num_heads,
        params.trans_ff_hidden, params.num_layers, params.mlp_hidden, params.dropout)
model = model.to(params.device)
print("Detected", torch.cuda.device_count(), "GPUs!")
model = torch.nn.DataParallel(model)
if params.wandb:
    wandb.watch(model)

########## Optimizer & Loss ###########

def my_fancy_optimizer(warmup_proportion=0.1):
    num_train_optimization_steps = len(train_dataset) * params.n_epochs

    param_optimizer = list(model.parameters())
    # param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]
    # no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    # optimizer_grouped_parameters = [
Exemplo n.º 26
0
def main():
    args = get_args()
    logger.info(f"args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}")

    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.tokenizer == 'char':
        tokenizer = CharTokenizer([])
    if args.tokenizer == 'kobert':
        print("koBERT tokenizer")
        tokenizer = KoBertTokenizer.from_pretrained('monologg/kobert')
        args.vocab_size = tokenizer.vocab_size
        print(args.vocab_size)

    model = TransformerModel(
        vocab_size=args.vocab_size,
        hidden_size=args.hidden_size,
        num_attention_heads=args.num_attention_heads,
        num_encoder_layers=args.num_encoder_layers,
        num_decoder_layers=args.num_decoder_layers,
        intermediate_size=args.intermediate_size,
        dropout=args.dropout,
    ).to(args.device)
    logger.info(f"# of model parameters: {sum(p.numel() for p in model.parameters()) * 1e-6:.2f}M")

    eos_setting = args.eos_setting

    bind_nsml(model, tokenizer, args, eos=eos_setting)
    if args.pause:
        nsml.paused(scope=locals())

    #train_data, valid_data = None, None
    if args.mode == "train" or args.mode == "pretrain" or args.mode == "semi-train":
        if args.mode == "train":
            noisy_sents = read_strings(os.path.join(args.data_dir, "train_data", "train_data"))
            sents_annotation = read_strings(os.path.join(args.data_dir, "train_data", "train_annotation"))
            clean_sents = read_strings(os.path.join(args.data_dir, "train_label"))

        if args.mode == "semi-train":
            noisy_sents = read_strings(os.path.join(args.data_dir, "train_data", "train_data"))
            sents_annotation = read_strings(os.path.join(args.data_dir, "train_data", "train_annotation"))
            clean_sents = read_strings(os.path.join(args.data_dir, "train_label"))

            checkpoint = 'generated_data'
            sess = 't0005/rush1-1/'+str(args.semi_dataset)
            # five copy
            #sess = 't0005/rush1-1/209'
            # one copy
            #sess = 't0005/rush1-1/224'
            semi_noisy_sents, semi_clean_sents = load_generated_data(checkpoint=checkpoint, session=sess)
            semi_sents_annotation = ['None'] * len(semi_noisy_sents)

        if args.mode == "pretrain":
            print("PRETRAIN MODE ON!!")
            checkpoint = 'generated_data'
            sess = 't0005/rush1-1/113'
            noisy_sents, clean_sents = load_generated_data(checkpoint=checkpoint, session=sess)
            sents_annotation = ['None']*len(noisy_sents)

        error_type_counter = Counter()

        for annotation in sents_annotation:
            error_type_counter += Counter(annotation.split(','))

        print(error_type_counter)

        # cleaning noise 버전
        # pairs = [{"noisy": preprocess_sentence(noisy), "clean": clean} for noisy, clean in zip(noisy_sents, clean_sents)]
        # original 버전

        if args.mode == "semi-train":
            pairs = [{"noisy": noisy, "clean": clean, "annotation": annot} for noisy, clean, annot in
                     zip(noisy_sents, clean_sents, sents_annotation)]
            semi_pairs = [{"noisy": noisy, "clean": clean, "annotation": annot} for noisy, clean, annot in
                     zip(semi_noisy_sents, semi_clean_sents, semi_sents_annotation)]

            train_data = pairs[:-args.num_val_data]+semi_pairs
            valid_data = pairs[-args.num_val_data:]
            logger.info(f"# of train data: {len(train_data)}")
            logger.info(f"# of valid data: {len(valid_data)}")

            train_sents = [x['noisy'] for x in train_data] + [x['clean'] for x in train_data]
            tokenizer = CharTokenizer.from_strings(train_sents, args.vocab_size)
            bind_nsml(model, tokenizer, args, eos=eos_setting)

        else:
            pairs = [{"noisy": noisy, "clean": clean, "annotation": annot} for noisy, clean, annot in zip(noisy_sents, clean_sents, sents_annotation)]

            train_data, valid_data = pairs[:-args.num_val_data], pairs[-args.num_val_data:]
            logger.info(f"# of train data: {len(train_data)}")
            logger.info(f"# of valid data: {len(valid_data)}")

            train_sents = [x['noisy'] for x in train_data] + [x['clean'] for x in train_data]
            #tokenizer = CharTokenizer.from_strings(train_sents, args.vocab_size)
            bind_nsml(model, tokenizer, args,eos=eos_setting)



        ## to load pretrained model
        nsml.load(checkpoint='best', session='t0005/rush1-2/79')
        #print(tokenizer.vocab)

    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model, dim=1)

    if args.mode == "train" or args.mode == "pretrain" or args.mode == 'semi-train':
        train(model, tokenizer, train_data, valid_data, args, eos=eos_setting)
Exemplo n.º 27
0
        shuffle=True,
        collate_fn=collate_trainval,
    )
    #########################################################

    ############### model, optimizer ########################
    print("loading model and optimizer...")
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print("using GPU numbers {}".format(CONFIG.hyperparam.misc.gpu_ids))
    else:
        device = torch.device("cpu")
        print("using CPU")
    model = TransformerModel(
        CONFIG,
        vocab_size=len(tokenizer),
        bos_idx=tokenizer.bos_idx,
        pad_idx=tokenizer.pad_idx,
    )
    model = model.to(device)
    if CONFIG.hyperparam.optimization.name == "Adam":
        optimizer = optim.Adam(
            model.parameters(),
            lr=CONFIG.hyperparam.optimization.lr,
            betas=(
                CONFIG.hyperparam.optimization.beta1,
                CONFIG.hyperparam.optimization.beta2,
            ),
            weight_decay=CONFIG.hyperparam.optimization.weight_decay,
        )
    else:
        raise NotImplementedError("only Adam implemented")
                                init_token='<sos>',
                                eos_token='<eos>',
                                lower=True)
    train_txt, val_txt, test_txt = torchtext.datasets.WikiText2.splits(
        TEXT,
        root='datas',
        train='wiki.train.tokens',
        validation='wiki.valid.tokens',
        test='wiki.test.tokens')

    # 依据训练集构建词典
    TEXT.build_vocab(train_txt)

    model = TransformerModel(len(TEXT.vocab.stoi),
                             ninp=200,
                             nhead=2,
                             nhid=200,
                             nlayers=2,
                             dropout=0.2).to(device)
    # 模型加载训练好的参数
    # checkpoint = torch.load('datasets/models/best_model.pth.tar')
    checkpoint = torch.load('temp/models/best_model.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])

    # 已知序列
    history = 'it seems'
    h = []
    for w in history.split():
        h.append([TEXT.vocab.stoi[w]])

    while (True):
        # 把列表转化成 tensor ,然后计算模型输出
Exemplo n.º 29
0
def main(args):
    random_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda")
    device = torch.device("cuda" if args.cuda else "cpu")

    corpus = data.Corpus(args.data)
    ntokens = len(corpus.dictionary)
    print('loaded dictionary')
    if args.model == 'Transformer':
        model = TransformerModel(
            ntokens,
            args.emsize,
            args.nhead,
            args.nhid,
            args.nlayers,
            args.dropout).to(device)
    else:
        model = RNNModel(
            args.model,
            ntokens,
            args.emsize,
            args.nhid,
            args.nlayers,
            args.dropout,
            args.tied).to(device)

    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    print('loaded model')

    is_transformer_model = hasattr(
        model, 'model_type') and model.model_type == 'Transformer'
    if not is_transformer_model:
        hidden = model.init_hidden(1)
    input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
    with open(args.outf, 'w') as outf:
        with torch.no_grad():  # no tracking history
            for i in range(args.words):
                if is_transformer_model:
                    output = model(input, False)
                    word_weights = output[-1].squeeze().div(
                        args.temperature).exp().cpu()
                    word_idx = torch.multinomial(word_weights, 1)[0]
                    word_tensor = torch.Tensor([[word_idx]]).long().to(device)
                    input = torch.cat([input, word_tensor], 0)
                else:
                    output, hidden = model(input, hidden)
                    word_weights = output.squeeze().div(args.temperature).exp().cpu()
                    word_idx = torch.multinomial(word_weights, 1)[0]
                    input.fill_(word_idx)

                word = corpus.dictionary.idx2word[word_idx]

                outf.write(word + ('\n' if i % 20 == 19 else ' '))

                if i % args.log_interval == 0:
                    print('| Generated {}/{} words'.format(i, args.words))
Exemplo n.º 30
0
def train(args, logger, model_save_dir):
    # set seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    if args.infre:
        pretrain_embed = pickle.load(
            open('../embed_infre/{}'.format(args.embed), 'rb'))
        train_dataset = pickle.load(open('../data/train.infre.pkl', 'rb'))

    else:
        pretrain_embed = pickle.load(
            open('../embed/{}'.format(args.embed), 'rb'))
        train_dataset = pickle.load(open('../data/train.pkl', 'rb'))

    try:
        pretrain_embed = torch.from_numpy(pretrain_embed).float()
    except:
        pretrain_embed = pretrain_embed.float()

    train_dataset = ProbingListMaxDataset(train_dataset)
    dataLoader = DataLoader(train_dataset,
                            batch_size=args.batch_sz,
                            shuffle=True)
    if args.model == 'BiLSTM':
        model = ListMax(args.hidden_dim, pretrain_embed)
    elif args.model == 'CNN':
        model = CNN(pretrained=pretrain_embed)
    else:
        model = TransformerModel(pretrained=pretrain_embed,
                                 nhead=5,
                                 nhid=50,
                                 nlayers=2)

    # model = ListMaxTransformer(args.hidden_dim, pretrain_embed)
    if torch.cuda.is_available():
        model.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    best_dev_acc = 0
    best_dev_model = None
    best_dev_test_acc = 0
    counter = 0

    for epoch in range(1, args.n_epoch + 1):
        train_loss = 0
        train_acc = 0
        model.train()
        iteration = 0
        for batch in dataLoader:
            optimizer.zero_grad()

            x = torch.stack(batch['input'])  # 5 x bz
            y = batch['label']  # bz

            if torch.cuda.is_available():
                x = x.cuda()
                y = y.cuda()

            output = model(x)
            loss = criterion(output, y)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            train_acc += (output.argmax(1) == y).sum().item()

            iteration += 1
            # if iteration % args.iter_print == 0:
            #     logger.info('{}-{}-{}-{}'.format(epoch, iteration, train_loss, train_acc))

        train_loss = train_loss / len(train_dataset)
        train_acc = train_acc / len(train_dataset)
        dev_loss, dev_acc = val(model, mode='dev')
        test_loss, test_acc = val(model, mode='test')
        if dev_acc > best_dev_acc:
            best_dev_model = model.state_dict().copy()
            best_dev_acc = dev_acc
            best_dev_test_acc = test_acc
            counter = 0
        else:

            counter += 1

        logger.info('TRAIN: epoch:{}-loss:{}-acc:{}'.format(
            epoch, train_loss, train_acc))
        logger.info('DEV: epoch:{}-loss:{}-acc:{}'.format(
            epoch, dev_loss, dev_acc))
        logger.info('TEST: epoch:{}-loss:{}-acc:{}'.format(
            epoch, test_loss, test_acc))
        logger.info('BEST-DEV-ACC: {}, BEST-DEV-TEST-ACC:{}'.format(
            best_dev_acc, best_dev_test_acc))
        #
        # if counter > 30:
        #     break

    torch.save(
        best_dev_model,
        model_save_dir + '/model-{}-{}.pt'.format(best_dev_test_acc, args.lr))