Exemplo n.º 1
0
def train_model(config_path: str):
    writer = SummaryWriter()
    config = read_training_pipeline_params(config_path)
    logger.info("pretrained_emb {b}", b=config.net_params.pretrained_emb)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info("Device is {device}", device=device)
    SRC, TRG, dataset = get_dataset(config.dataset_path, False)
    train_data, valid_data, test_data = split_data(
        dataset, **config.split_ration.__dict__)
    SRC.build_vocab(train_data, min_freq=3)
    TRG.build_vocab(train_data, min_freq=3)
    torch.save(SRC.vocab, config.src_vocab_name)
    torch.save(TRG.vocab, config.trg_vocab_name)
    logger.info("Vocab saved")
    print(f"Unique tokens in source (ru) vocabulary: {len(SRC.vocab)}")
    print(f"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}")
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=config.BATCH_SIZE,
        device=device,
        sort_key=_len_sort_key,
    )
    INPUT_DIM = len(SRC.vocab)
    OUTPUT_DIM = len(TRG.vocab)

    config_encoder = BertConfig(vocab_size=INPUT_DIM)
    config_decoder = BertConfig(vocab_size=OUTPUT_DIM)
    config = EncoderDecoderConfig.from_encoder_decoder_configs(
        config_encoder, config_decoder)
    model = EncoderDecoderModel(config=config)
    config_encoder = model.config.encoder
    config_decoder = model.config.decoder
    config_decoder.is_decoder = True
    config_decoder.add_cross_attention = True
    config = EncoderDecoderConfig.from_encoder_decoder_configs(
        config_encoder, config_decoder)
    model = EncoderDecoderModel(config=config)
    args = TrainingArguments(
        output_dir="output",
        evaluation_strategy="steps",
        eval_steps=500,
        per_device_train_batch_size=128,
        per_device_eval_batch_size=128,
        num_train_epochs=10,
        save_steps=3000,
        seed=0,
        load_best_model_at_end=True,
    )
    # args.place_model_on_device = device
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_iterator,
        eval_dataset=valid_iterator,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    )
    trainer.train()

    model.save_pretrained("bert2bert")
Exemplo n.º 2
0
def get_model(vocab_size=30000):
    config_encoder = BertConfig()
    config_decoder = BertConfig()

    config_encoder.vocab_size = vocab_size
    config_decoder.vocab_size = vocab_size

    config_decoder.is_decoder = True
    config_decoder.add_cross_attention = True

    config = EncoderDecoderConfig.from_encoder_decoder_configs(
        config_encoder, config_decoder)
    model = EncoderDecoderModel(config=config)

    return model
    def __init__(self):
        super().__init__()
        encoder_config = BertConfig(num_hidden_layers=6,
                                    vocab_size=21128,
                                    hidden_size=512,
                                    num_attention_heads=8)
        self.encoder = BertModel(encoder_config)

        decoder_config = BertConfig(num_hidden_layers=6,
                                    vocab_size=21128,
                                    hidden_size=512,
                                    num_attention_heads=8)
        decoder_config.is_decoder = True
        self.decoder = BertModel(decoder_config)

        self.linear = nn.Linear(512, 21128, bias=False)
Exemplo n.º 4
0
    def __init__(self):
        super().__init__()
        encoder_config = BertConfig(num_hidden_layers=6,
                                    vocab_size=30522,
                                    hidden_size=512,
                                    num_attention_heads=8)
        self.encoder = BertModel(encoder_config)

        decoder_config = BertConfig(num_hidden_layers=6,
                                    vocab_size=30522,
                                    hidden_size=512,
                                    num_attention_heads=8)

        decoder_config.is_decoder = True
        decoder_config.add_cross_attention = True

        self.decoder = BertModel(decoder_config)

        self.linear = nn.Linear(
            512, 30522, bias=False)  # 21128 for chinese 30522 for English
    def __init__(self, config, language_pretrained_model_path=None):
        super(VisualLinguisticBertDecoder, self).__init__(config)

        self.config = config

        # embeddings
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size)
        self.end_embedding = nn.Embedding(1, config.hidden_size)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)
        self.embedding_LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.embedding_dropout = nn.Dropout(config.hidden_dropout_prob)

        # for compatibility of roberta
        self.position_padding_idx = config.position_padding_idx

        # visual transform
        self.visual_1x1_text = None
        self.visual_1x1_object = None
        if config.visual_size != config.hidden_size:
            self.visual_1x1_text = nn.Linear(config.visual_size,
                                             config.hidden_size)
            self.visual_1x1_object = nn.Linear(config.visual_size,
                                               config.hidden_size)
        if config.visual_ln:
            self.visual_ln_text = BertLayerNorm(config.hidden_size, eps=1e-12)
            self.visual_ln_object = BertLayerNorm(config.hidden_size,
                                                  eps=1e-12)
        else:
            visual_scale_text = nn.Parameter(torch.as_tensor(
                self.config.visual_scale_text_init, dtype=torch.float),
                                             requires_grad=True)
            self.register_parameter('visual_scale_text', visual_scale_text)
            visual_scale_object = nn.Parameter(torch.as_tensor(
                self.config.visual_scale_object_init, dtype=torch.float),
                                               requires_grad=True)
            self.register_parameter('visual_scale_object', visual_scale_object)

        # *********************************************
        # FM addition - Set-up decoder layer for MT
        #  Initializing a BERT bert-base-uncased style configuration
        configuration = BertConfig()
        configuration.vocab_size = config.vocab_size
        # FM edit: reduce size - 12 layers doesn't fit in single 12GB GPU
        configuration.num_hidden_layers = 6
        configuration.is_decoder = True
        # Initializing a model from the bert-base-uncased style configuration
        self.decoder = BertModel(configuration)
        # *********************************************

        if self.config.with_pooler:
            self.pooler = BertPooler(config)

        # init weights
        self.apply(self.init_weights)
        if config.visual_ln:
            self.visual_ln_text.weight.data.fill_(
                self.config.visual_scale_text_init)
            self.visual_ln_object.weight.data.fill_(
                self.config.visual_scale_object_init)

        # load language pretrained model
        if language_pretrained_model_path is not None:
            self.load_language_pretrained_model(language_pretrained_model_path)

        if config.word_embedding_frozen:
            for p in self.word_embeddings.parameters():
                p.requires_grad = False
            self.special_word_embeddings = nn.Embedding(
                NUM_SPECIAL_WORDS, config.hidden_size)
            self.special_word_embeddings.weight.data.copy_(
                self.word_embeddings.weight.data[:NUM_SPECIAL_WORDS])