Exemplo n.º 1
0
    def __init__(self, config, num=0):
        super(roBerta, self).__init__()
        model_config = RobertaConfig()
        model_config.vocab_size = config.vocab_size
        model_config.hidden_size = config.hidden_size[0]
        model_config.num_attention_heads = 16
        # 计算loss的方法
        self.loss_method = config.loss_method
        self.multi_drop = config.multi_drop

        self.roberta = RobertaModel(model_config)
        if config.requires_grad:
            for param in self.roberta.parameters():
                param.requires_grad = True

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.hidden_size = config.hidden_size[num]
        if self.loss_method in ['binary', 'focal_loss', 'ghmc']:
            self.classifier = nn.Linear(self.hidden_size, 1)
        else:
            self.classifier = nn.Linear(self.hidden_size, self.num_labels)
        self.text_linear = nn.Linear(config.embeding_size,
                                     config.hidden_size[0])
        self.vocab_layer = nn.Linear(config.hidden_size[0], config.vocab_size)

        self.classifier.apply(self._init_weights)
        self.roberta.apply(self._init_weights)
        self.text_linear.apply(self._init_weights)
        self.vocab_layer.apply(self._init_weights)
Exemplo n.º 2
0
    def __init__(self, args, config, dataloader, ckpdir):

        self.device = torch.device('cuda') if (
            args.gpu and torch.cuda.is_available()) else torch.device('cpu')
        if torch.cuda.is_available(): print('[Runner] - CUDA is available!')
        self.model_kept = []
        self.global_step = 1
        self.log = SummaryWriter(ckpdir)

        self.args = args
        self.config = config
        self.dataloader = dataloader
        self.ckpdir = ckpdir

        # optimizer
        self.learning_rate = float(config['optimizer']['learning_rate'])
        self.warmup_proportion = config['optimizer']['warmup_proportion']
        self.gradient_accumulation_steps = config['optimizer'][
            'gradient_accumulation_steps']
        self.gradient_clipping = config['optimizer']['gradient_clipping']

        # Training details
        self.apex = config['runner']['apex']
        self.total_steps = config['runner']['total_steps']
        self.log_step = config['runner']['log_step']
        self.save_step = config['runner']['save_step']
        self.duo_feature = config['runner']['duo_feature']
        self.max_keep = config['runner']['max_keep']

        # Model configs
        self.semantic_config = RobertaConfig(**config['semantic'])
        self.acoustic_config = RobertaConfig(**config['acoustic'])
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ROBERTA constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = RobertaConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = RobertaModel(config)
        elif pretrained_model_name is not None:
            model = RobertaModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = RobertaConfig.from_json_file(config_filename)
            model = RobertaModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor"
            )

        model.to(self._device)

        self.add_module("roberta", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Exemplo n.º 4
0
 def __init__(self, tokenizer):
     super(RobertaForMultipleChoiceWithLM2, self).__init__()
     self.roberta_lm = RobertaForMaskedLM.from_pretrained(
         'pre_weights/roberta-large_model.bin',
         config=RobertaConfig.from_pretrained('roberta-large'))
     self.roberta = RobertaForMultipleChoice.from_pretrained(
         'pre_weights/roberta-large_model.bin',
         config=RobertaConfig.from_pretrained('roberta-large'))
     self.tokenizer = tokenizer
     self.lamda = nn.Parameter(torch.tensor([1.0]))
Exemplo n.º 5
0
 def init_model(self, model_name):
     if model_name == 'Bert':
         config = BertConfig.from_pretrained('bert-base-uncased')
         config.hidden_dropout_prob = 0.2
         config.attention_probs_dropout_prob = 0.2
         self.model = BertForMultipleChoice.from_pretrained(
             'pre_weights/bert-base-uncased_model.bin',
             config=config)
     elif model_name == 'Roberta':
         config = RobertaConfig.from_pretrained('roberta-large')
         config.hidden_dropout_prob = 0.2
         config.attention_probs_dropout_prob = 0.2
         self.model = RobertaForMultipleChoice.from_pretrained(
             'pre_weights/roberta-large_model.bin',
             config=config)
         # print('load csqa pretrain weights...')
         # self.model.load_state_dict(torch.load(
         #     'checkpoints/commonsenseQA_pretrain_temp.pth'
         # ))
     elif model_name == 'Albert':
         self.model = AlbertForMultipleChoice.from_pretrained(
             'pre_weights/albert-xxlarge_model.bin',
             config=AlbertConfig.from_pretrained('albert-xxlarge-v1'))
     elif model_name == 'RobertaLM':
         config = RobertaConfig.from_pretrained('roberta-large')
         config.hidden_dropout_prob = 0.2
         config.attention_probs_dropout_prob = 0.2
         self.model = RobertaForMultipleChoiceWithLM.from_pretrained(
             'pre_weights/roberta-large_model.bin',
             config=config)
     elif model_name == 'RobertaLM2':
         self.model = RobertaForMultipleChoiceWithLM2(self.tokenizer)
     elif 'GNN' in model_name:
         self.model = SOTA_goal_model(self.args)
     elif 'LM' in model_name:
         config = RobertaConfig.from_pretrained('roberta-large')
         config.hidden_dropout_prob = 0.2
         config.attention_probs_dropout_prob = 0.2
         self.model = RobertaForMultipleChoiceWithLM.from_pretrained(
             'pre_weights/roberta-large_model.bin',
             config=config)
     elif 'KBERT' in model_name:
         config = RobertaConfig.from_pretrained('roberta-large')
         config.hidden_dropout_prob = 0.2
         config.attention_probs_dropout_prob = 0.2
         self.model = RobertaForMultipleChoice.from_pretrained(
             'pre_weights/roberta-large_model.bin',
             config=config)
     else:
         pass
     self.model.to(self.args['device'])
     if torch.cuda.device_count() > 1 and self.args['use_multi_gpu']:
         print("{} GPUs are available. Let's use them.".format(
             torch.cuda.device_count()))
         self.model = torch.nn.DataParallel(self.model)
Exemplo n.º 6
0
def load_model(model_path, model_name, num_classes):
    if model_name == 'bert-base-uncased':
        tokenizer = BertTokenizer.from_pretrained(model_name,
                                                  do_lower_case=True)
        config = BertConfig.from_pretrained(model_name)
    else:
        tokenizer = RobertaTokenizer.from_pretrained(model_name,
                                                     do_lower_case=True)
        config = RobertaConfig.from_pretrained(model_name)

    if model_name == 'bert-base-uncased':
        transformer_model = BertModel.from_pretrained(model_name,
                                                      config=config)
    else:
        transformer_model = RobertaModel.from_pretrained(model_name,
                                                         config=config)

    config.output_hidden_states = True

    model = SequenceClassifier(transformer_model, config, n_layers,
                               num_classes)
    model.load_state_dict(
        torch.load('{model_path}'.format(model_path=model_path)))
    model.eval()

    return model, tokenizer
Exemplo n.º 7
0
    def __init__(self,
                 args,
                 tokenizer,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.tokenizer = tokenizer
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.id2label = load_id2label(args.id2label)
        self.num_labels = len(self.id2label)

        self.config = RobertaConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task="VLSP2020-Relex",
            id2label={str(i): label
                      for i, label in self.id2label.items()},
            label2id={label: i
                      for i, label in self.id2label.items()},
        )
        if self.args.model_type == "es":
            self.model = RobertaEntityStarts.from_pretrained(
                args.model_name_or_path, config=self.config)
        elif self.args.model_type == "all":
            self.model = RobertaConcatAll.from_pretrained(
                args.model_name_or_path, config=self.config)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = random_attention_mask(
                [self.batch_size, self.seq_length])

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        config = RobertaConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            is_decoder=False,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, token_type_ids, attention_mask
Exemplo n.º 9
0
    def __init__(self):
        super(TweetModel, self).__init__()

        config = RobertaConfig.from_pretrained('roberta/config.json',
                                               output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained(
            'roberta/pytorch_model.bin', config=config)
        self.dropout = nn.Dropout(0.15)
        self.cnn1 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2),
                                  torch.nn.BatchNorm1d(128),
                                  torch.nn.LeakyReLU())
        self.cnn1_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2),
                                    torch.nn.BatchNorm1d(64),
                                    torch.nn.LeakyReLU())
        self.cnn2 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2),
                                  torch.nn.BatchNorm1d(128),
                                  torch.nn.LeakyReLU())
        self.cnn2_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2),
                                    torch.nn.BatchNorm1d(64),
                                    torch.nn.LeakyReLU())
        self.fc1 = nn.Linear(64, 1)
        self.fc2 = nn.Linear(64, 1)
        nn.init.normal_(self.fc1.weight, std=0.02)
        nn.init.normal_(self.fc1.bias, 0)
        nn.init.normal_(self.fc2.weight, std=0.02)
        nn.init.normal_(self.fc2.bias, 0)
def main():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    config = RobertaConfig.from_pretrained(cf.model_base,
                                           num_labels=cf.num_labels,
                                           finetuning_task=cf.finetuning_task)
    tokenizer = RobertaTokenizer.from_pretrained(cf.model_base,
                                                 do_lower_case=True)
    model = RobertaForSequenceClassification.from_pretrained(cf.model_base,
                                                             config=config)
    model.to(device)

    train_raw_text = get_raw_text(cf.train_file_dir)

    train_features = tokenize_raw_text(train_raw_text, tokenizer)

    train_dataset = create_dataset(train_features)

    optimizer = AdamW(model.parameters(),
                      lr=cf.learning_rate,
                      eps=cf.adam_epsilon)

    global_step, training_loss = train(dataset,
                                       model,
                                       optimizer,
                                       batch_size=cf.train_batch_size,
                                       num_epochs=cf.num_epochs)

    torch.save(model.state_dict(), cf.model_file_dir)
Exemplo n.º 11
0
def load_model(args):

    if args.transformer_model.startswith('bert'):
        path = '/home/yinfan/.cache/torch/transformers/bert-base-uncased-pytorch_model.bin'
        config = BertConfig.from_pretrained(args.transformer_model,
                                            output_hidden_states=True)
        tokenizer = BertTokenizer.from_pretrained(args.transformer_model,
                                                  do_lower_case=True)
        model = BertModel.from_pretrained(
            path,
            from_tf=bool('.ckpt' in args.transformer_model),
            config=config)
    else:
        path = '/home/yinfan/.cache/torch/transformers/roberta-base-pytorch_model.bin'
        tokenizer = RobertaTokenizer.from_pretrained(args.transformer_model)
        config = RobertaConfig.from_pretrained(args.transformer_model,
                                               output_hidden_states=True)
        model = RobertaModel.from_pretrained(
            path,
            from_tf=bool('.ckpt' in args.transformer_model),
            config=config)
    # roberta = RobertaModel.from_pretrained(args.roberta_model, cache_dir=args.cache_dir, config=config)
    model_embedding = model.embeddings
    model_embedding.to(args.device)
    if args.n_gpu > 1:
        model_embedding = torch.nn.DataParallel(model_embedding)
    model.to(args.device)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
    if args.untrained_transformer == 1:
        model.apply(init_weights)
    return model, model_embedding, tokenizer
Exemplo n.º 12
0
    def __init__(self, args, device='cpu'):
        super().__init__()
        self.args = args
        self.device = device
        self.epoch = 0
        self.dropout = nn.Dropout(self.args.dropout)

        # Entailment Tracking
        # roberta_model_path = '/research/king3/ik_grp/yfgao/pretrain_models/huggingface/roberta-base'
        roberta_model_path = args.pretrained_lm_path
        roberta_config = RobertaConfig.from_pretrained(roberta_model_path,
                                                       cache_dir=None)
        self.roberta = RobertaModel.from_pretrained(roberta_model_path,
                                                    cache_dir=None,
                                                    config=roberta_config)
        encoder_layer = TransformerEncoderLayer(self.args.bert_hidden_size, 12,
                                                4 * self.args.bert_hidden_size)
        encoder_norm = nn.LayerNorm(self.args.bert_hidden_size)
        self.transformer_encoder = TransformerEncoder(encoder_layer,
                                                      args.trans_layer,
                                                      encoder_norm)
        self._reset_transformer_parameters()
        self.w_entail = nn.Linear(self.args.bert_hidden_size, 3, bias=True)

        # Logic Reasoning
        self.entail_emb = nn.Parameter(
            torch.rand(3, self.args.bert_hidden_size))
        nn.init.normal_(self.entail_emb)

        self.w_selfattn = nn.Linear(self.args.bert_hidden_size * 2,
                                    1,
                                    bias=True)
        self.w_output = nn.Linear(self.args.bert_hidden_size * 2, 4, bias=True)
Exemplo n.º 13
0
    def __init__(self,
                 data_dir: Path,
                 tokenizer: PreTrainedTokenizer,
                 dataset: Dataset,
                 local_rank=-1):
        assert data_dir, "data_dir input needed"

        self.model_dir = f"{data_dir}/results"
        self.dataset = dataset

        self.config = RobertaConfig(
            vocab_size=52_000,
            max_position_embeddings=514,
            num_attention_heads=12,
            num_hidden_layers=6,
            type_vocab_size=1,
        )
        self.training_args = TrainingArguments(
            run_name=data_dir.name,
            local_rank=local_rank,
            learning_rate=0.00005,  # default 0.00005
            output_dir=f"{self.model_dir}",
            overwrite_output_dir=False,
            num_train_epochs=1,
            per_device_train_batch_size=48,  # Nvidia K80 99%
            seed=42,
            save_steps=10_000,
            save_total_limit=1,
        )

        self.data_collator = DataCollatorForLanguageModeling(
            tokenizer=tokenizer, mlm=True, mlm_probability=0.15)
Exemplo n.º 14
0
 def __init__(self):
     self.num_labels: int = 2
     config: RobertaConfig = RobertaConfig.from_pretrained(
         "./BERTweet_base_transformers/config.json",
         output_hidden_states=True,
     )
     super().__init__(config)
     self.bertweet: RobertaModel = RobertaModel.from_pretrained(
         "./BERTweet_base_transformers/model.bin", config=config)
     self.dense = nn.Linear(
         in_features=768 * 4,
         out_features=1024,
     )
     self.dropout = nn.Dropout(p=0.15)
     self.dense_2 = nn.Linear(
         in_features=1024,
         out_features=512,
     )
     self.dense_3 = nn.Linear(
         in_features=512,
         out_features=256,
     )
     self.classifier = nn.Linear(
         in_features=256,
         out_features=self.num_labels,
     )
Exemplo n.º 15
0
def load_transformer_model(model_dir):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    config = RobertaConfig.from_json_file('{}/config.json'.format(model_dir))
    model = RobertaForSequenceClassification.from_pretrained(model_dir,
                                                             config=config)
    model = model.to(device)
    return model
Exemplo n.º 16
0
    def __init__(self, config: Bunch) -> None:
        pl.LightningModule.__init__(self)
        self.config = config

        bpe_codes_path = os.path.join(
            config.pretrained_model_base_path,
            "BERTweet_base_transformers/bpe.codes",
        )
        bpe = fastBPE(Namespace(bpe_codes=bpe_codes_path))
        vocab = Dictionary()
        vocab.add_from_file(
            os.path.join(
                config.pretrained_model_base_path,
                "BERTweet_base_transformers/dict.txt",
            ))

        tokenizer = BertweetTokenizer(self.config.max_tokens_per_tweet, bpe,
                                      vocab)
        self.data_processor = BertweetDataProcessor(config, tokenizer)

        model_config = RobertaConfig.from_pretrained(
            os.path.join(
                config.pretrained_model_base_path,
                "BERTweet_base_transformers/config.json",
            ))
        self.model = RobertaForSequenceClassification.from_pretrained(
            os.path.join(
                config.pretrained_model_base_path,
                "BERTweet_base_transformers/model.bin",
            ),
            config=model_config,
        )
        self.loss = CrossEntropyLoss()
    def roberta_build(self,
                      sparse=False,
                      base_model=None,
                      density=1.0,
                      eval=True):
        if base_model == None:
            config = RobertaConfig(
                vocab_size=52_000,
                max_position_embeddings=514,
                num_attention_heads=12,
                num_hidden_layers=6,
                type_vocab_size=1,
            )

            model = RobertaForMaskedLM(config=config).cuda()
        else:
            model = base_model

        if sparse:
            mp = BlockSparseModelPatcher()
            mp.add_pattern(
                "roberta\.encoder\.layer\.[0-9]+.intermediate\.dense",
                {"density": density})
            mp.add_pattern("roberta\.encoder\.layer\.[0-9]+.output\.dense",
                           {"density": density})
            mp.patch_model(model)

        if eval:
            model.eval()

        return model, model.num_parameters()
    def __init__(self):
        self.config = RobertaConfig.from_pretrained("roberta-base")
        self.config.output_hidden_states = True

        self.tok = RobertaTokenizer.from_pretrained("roberta-base")
        self.model = RobertaModel.from_pretrained("roberta-base",
                                                  config=self.config)
Exemplo n.º 19
0
    def __init__(self, args):
        super(SOTA_goal_model, self).__init__()
        self.args = args
        # roberta_config = AlbertConfig.from_pretrained('albert-base-v2')
        # self.roberta = AlbertForMultipleChoice.from_pretrained(
        #     'pre_weights/albert-base-v2-pytorch_model.bin', config=roberta_config)
        roberta_config = RobertaConfig.from_pretrained('roberta-large')
        roberta_config.attention_probs_dropout_prob = 0.2
        roberta_config.hidden_dropout_prob = 0.2

        if args.get('with_lm'):
            self.roberta = RobertaForMultipleChoiceWithLM.from_pretrained(
                'pre_weights/roberta-large_model.bin', config=roberta_config)
        else:
            self.roberta = RobertaForMultipleChoice.from_pretrained(
                'pre_weights/roberta-large_model.bin', config=roberta_config)

        from utils.attentionUtils import SelfAttention
        self.gcn = GCNNet()
        self.merge_fc1 = nn.Linear(roberta_config.hidden_size + 128, 512)
        self.attn = SelfAttention(512, 8)
        # self.roberta_fc1 = nn.Linear(roberta_config.hidden_size, 128)  # 将 roberta vector 降维到与 gcn 相同
        # self.gcn_fc1 = nn.Linear(128, 128)  # 同上
        self.fc3 = nn.Linear(512 + roberta_config.hidden_size, 1)
        self.dropout = nn.Dropout(0.2)
Exemplo n.º 20
0
def main():
    args = build_parser().parse_args()

    print("Creating snapshot directory if not exist...")
    if not os.path.exists(args.snapshots_path):
        os.mkdir(args.snapshots_path)

    print("Loading Roberta components...")
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    config = RobertaConfig.from_pretrained("roberta-base",
                                           output_hidden_states=True)
    base_model = RobertaModel(config).cuda()

    model = LangInferModel(base_model, config, args.span_heads).cuda()
    optimizer = configure_adam_optimizer(model, args.lr, args.weight_decay,
                                         args.adam_epsilon)
    print("Preparing the data for training...")
    train_loader, test_loaders = build_data_loaders(args, tokenizer)
    criterion = nn.CrossEntropyLoss()

    print(
        f"Training started for {args.epoch_num} epochs. Might take a while...")
    train(args.epoch_num, model, optimizer, criterion, train_loader,
          test_loaders, args.snapshots_path)
    print("Training is now finished. You can check out the results now")
def get_training_objects(params):
    """
    Define and return training objects
    """
    config = RobertaConfig.from_pretrained(params["model_name"], num_labels=2)
    model = RobertaForSequenceClassification.from_pretrained(
        params["model_name"], config=config)
    model.to(params["device"])
    no_decay = ["bias", "LayerNorm.weight"]
    gpd_params = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            params["weight_decay"],
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]
    optimizer = AdamW(gpd_params, lr=params["lr"], eps=params["adam_epsilon"])
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=params["warmup_steps"],
        num_training_steps=params["total_steps"],
    )
    return model, optimizer, scheduler
Exemplo n.º 22
0
def get_classification_roberta():
    ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='ids')
    att = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='att')
    tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = RobertaConfig.from_pretrained(Config.Roberta.config)
    roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model,
                                                   config=config)

    x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    x = keras.layers.Dropout(0.2)(x[0])
    x = keras.layers.GlobalAveragePooling1D()(x)
    x = keras.layers.Dense(3, activation='softmax', name='sentiment')(x)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=x)
    lr_schedule = keras.experimental.CosineDecay(5e-5, 1000)
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer, metrics=['acc'])

    return model
Exemplo n.º 23
0
 def __init__(self, cfg, device):
     super().__init__()
     tokenizer = RobertaTokenizerFast.from_pretrained('./bird_bpe_vocab', max_len=256)
     _config = RobertaConfig(
         vocab_size=tokenizer._tokenizer.get_vocab_size(),
         hidden_size=512,
         num_hidden_layers=4,
         num_attention_heads=8,
         max_position_embeddings=256,
         pad_token_id=1,
         eos_token_id=0,
         bos_token_id=2,
         output_attentions=False,
         output_hidden_states=False
     )
     _model = RobertaForMaskedLM(_config)
     _model.load_state_dict(torch.load('bert_small/checkpoint-1100/pytorch_model.bin'))
     _model.eval()
     self.tokenizer = tokenizer
     self._model = _model
     self.device = device
     self.pad_token = 0
     self.batch_size = cfg.batch_size
     self.proj = None
     if cfg.proj_lang:
         self.proj = nn.Sequential(*[EqualisedLinearLayer(512, cfg.latent_dim, weight_scaling=cfg.weight_scaling), nn.Tanh()])
Exemplo n.º 24
0
def Bertolo_feature_extraction(ids,texts, feature_file_name):
    config = RobertaConfig.from_pretrained("./bert-like models/bertolo/config.json")
    tokenizer1 = AutoTokenizer.from_pretrained("./bertolo",normalization=True)
    model = AutoModel.from_pretrained("./bertolo",config=config)

    feature_dict={}
    for i in range(len(ids)):
        id = ids[i]
        print(id)
        title = texts[i]
        #input_ids = torch.tensor([tokenizer.encode(tumblr_text)])
        input_ids = tokenizer1.encode(title, return_tensors="pt")
        print(input_ids)

        #with torch.no_grad():
        features = model(input_ids)[0]  # Models outputs are now tuples
        print(features.size())
        feature = torch.mean(features, 1, True).detach().numpy()
        print(feature[0])

        feature = list(feature[0][0])
        print(feature)
        print(len(feature))
        feature_dict[tumblr_id]=feature
    np.save(feature_file_name, feature_dict)
Exemplo n.º 25
0
    def __init__(self, args):
        super().__init__()

        if not isinstance(args, argparse.Namespace):
            # eval mode
            assert isinstance(args, dict)
            args = argparse.Namespace(**args)

        # compute other fields according to args
        train_dataset = DependencyDataset(file_path=os.path.join(
            args.data_dir, f"train.{args.data_format}"),
                                          bert=args.bert_dir)
        # save these information to args to convene evaluation.
        args.pos_tags = train_dataset.pos_tags
        args.dep_tags = train_dataset.dep_tags
        args.ignore_pos_tags = train_dataset.ignore_pos_tags if args.ignore_punct else set(
        )
        args.num_gpus = len(
            [x for x in str(args.gpus).split(",")
             if x.strip()]) if "," in args.gpus else int(args.gpus)
        args.t_total = (len(train_dataset) //
                        (args.accumulate_grad_batches * args.num_gpus) +
                        1) * args.max_epochs

        self.save_hyperparameters(args)
        self.args = args

        bert_name = args.bert_name
        if bert_name == 'roberta-large':
            bert_config = RobertaConfig.from_pretrained(args.bert_dir)
            DependencyConfig = RobertaDependencyConfig
        elif bert_name == 'bert':
            bert_config = BertConfig.from_pretrained(args.bert_dir)
            DependencyConfig = BertDependencyConfig
        else:
            raise ValueError("Unknown bert name!!")

        self.model_config = DependencyConfig(
            pos_tags=args.pos_tags,
            dep_tags=args.dep_tags,
            pos_dim=args.pos_dim,
            additional_layer=args.additional_layer,
            additional_layer_dim=args.additional_layer_dim,
            additional_layer_type=args.additional_layer_type,
            arc_representation_dim=args.arc_representation_dim,
            tag_representation_dim=args.tag_representation_dim,
            biaf_dropout=args.biaf_dropout,
            **bert_config.__dict__)

        self.model = BiaffineDependencyParser(args.bert_dir,
                                              config=self.model_config)

        if args.freeze_bert:
            for param in self.model.bert.parameters():
                param.requires_grad = False

        self.train_stat = AttachmentScores()
        self.val_stat = AttachmentScores()
        self.test_stat = AttachmentScores()
        self.ignore_pos_tags = list(args.ignore_pos_tags)
    def build_model(self):
        ids = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)
        att = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)
        tok = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)

        # Network architecture
        config = RobertaConfig.from_pretrained(self.config.data.roberta.path +
                                               self.config.data.roberta.config)
        bert_model = TFRobertaModel.from_pretrained(
            self.config.data.roberta.path +
            self.config.data.roberta.roberta_weights,
            config=config)
        x = bert_model(ids, attention_mask=att, token_type_ids=tok)

        self.init_head(x[0])
        self.add_dropout(0.1)
        self.add_lstm(64, True)
        self.add_dropout(0.1)
        self.add_dense(1)
        self.add_activation('softmax')
        self.model = tf.keras.models.Model(
            inputs=[ids, att, tok], outputs=[self.start_head, self.end_head])
        self.model.compile(loss=self.config.model.loss,
                           optimizer=self.config.model.optimizer)
Exemplo n.º 27
0
 def _init_deep_model(self, model_type, model_path, num_labels, num_regs=None):
     if 'roberta' in model_type:
         tokenizer = RobertaTokenizer.from_pretrained(model_path)
         config = RobertaConfig.from_pretrained(model_path)
         config.num_labels = num_labels
         model = RobertaForSequenceClassification.from_pretrained(model_path, config=config)
         model.eval()
         model.to(self.device)
     elif 'electra_multitask' in model_type:
         tokenizer = ElectraTokenizer.from_pretrained(model_path)
         tokenizer.add_special_tokens({'additional_special_tokens': ['[VALUES]']})
         config = ElectraConfig.from_pretrained(model_path)
         config.num_labels = num_labels
         config.num_regs = num_regs
         config.vocab_size = len(tokenizer)
         model = ElectraForSequenceClassificationMultiTask.from_pretrained(model_path, config=config)
         model.eval()
         model.to(self.device)
     elif 'electra' in model_type:
         tokenizer = ElectraTokenizer.from_pretrained(model_path)
         config = ElectraConfig.from_pretrained(model_path)
         config.num_labels = num_labels
         model = ElectraForSequenceClassification.from_pretrained(model_path, config=config)
         model.eval()
         model.to(self.device)
     else:
         raise NotImplementedError()
     return config, tokenizer, model
Exemplo n.º 28
0
def main():
    args = run_parse_args()
    logger.info(args)

    # Setup CUDA, GPU
    args.use_gpu = torch.cuda.is_available() and not args.no_cuda
    args.model_device = torch.device(
        f"cuda:{args.model_gpu_index}" if args.use_gpu else "cpu")
    args.n_gpu = torch.cuda.device_count()

    # Setup logging
    logger.warning("Model Device: %s, n_gpu: %s", args.model_device,
                   args.n_gpu)

    # Set seed
    set_seed(args)

    load_model_path = os.path.join(args.query_output_root,
                                   args.previous_qencoder, "model")
    logger.info(f"load from {load_model_path}")
    config = RobertaConfig.from_pretrained(load_model_path)
    model = RobertaDot.from_pretrained(load_model_path, config=config)

    model.to(args.model_device)
    logger.info("Training/evaluation parameters %s", args)
    # Evaluation
    train(args, model)
Exemplo n.º 29
0
        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = RobertaConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
            )

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
Exemplo n.º 30
0
def create_roberta_model(tokens_train, attn_mask_train, num_classes):

    config = RobertaConfig(vocab_size=50021, hidden_size=1024,
                           num_hidden_layers=16, num_attention_heads=16, intermediate_size=2048, 
                           attention_probs_dropout_prob=0.3, hidden_dropout_prob=0.3)
    
    bert = TFRobertaModel(config)

    # dense1 = Dense(500, activation='relu')
    dense2 = Dense(368, activation='relu')
    dense3 = Dense(num_classes, activation='softmax')
    dropout = Dropout(0.3)
    
    tokens = Input(shape=(tokens_train.shape[1],), dtype=tf.int32)
    attn_mask = Input(shape=(attn_mask_train.shape[1],), dtype=tf.int32)

    pooled_output = bert(tokens, attn_mask).pooler_output

    med = dropout(dense2(pooled_output))

    final = dense3(pooled_output)

    model = Model(inputs=[tokens, attn_mask], outputs=final)
    
    return model