示例#1
0
    def __init__(self, params):
        super(BiEncoderModule, self).__init__()
        ctxt_bert = BertModel.from_pretrained(params["bert_model"],
                                              output_hidden_states=True)
        if params["load_cand_enc_only"]:
            bert_model = "bert-large-uncased"
        else:
            bert_model = params['bert_model']
        cand_bert = BertModel.from_pretrained(
            bert_model,
            output_hidden_states=True,
        )
        self.context_encoder = BertEncoder(
            ctxt_bert,
            params["out_dim"],
            layer_pulled=params["pull_from_layer"],
            add_linear=params["add_linear"],
        )
        self.cand_encoder = BertEncoder(
            cand_bert,
            params["out_dim"],
            layer_pulled=params["pull_from_layer"],
            add_linear=params["add_linear"],
        )
        if params.get("freeze_cand_enc", False):
            for param in self.cand_encoder.parameters():
                param.requires_grad = False

        self.config = ctxt_bert.config

        ctxt_bert_output_dim = ctxt_bert.embeddings.word_embeddings.weight.size(
            1)

        self.mention_aggregation_type = params.get('mention_aggregation_type',
                                                   None)
        self.classification_heads = nn.ModuleDict({})
        self.linear_compression = None
        if self.mention_aggregation_type is not None:
            classification_heads_dict = {
                'get_context_embeds':
                GetContextEmbedsHead(
                    self.mention_aggregation_type,
                    ctxt_bert_output_dim,
                    cand_bert.embeddings.word_embeddings.weight.size(1),
                )
            }
            classification_heads_dict['mention_scores'] = MentionScoresHead(
                ctxt_bert_output_dim,
                params["mention_scoring_method"],
                params.get("max_mention_length", 10),
            )
            self.classification_heads = nn.ModuleDict(
                classification_heads_dict)
        elif ctxt_bert_output_dim != cand_bert.embeddings.word_embeddings.weight.size(
                1):
            # mapping to make the output dimensions match for dot-product similarity
            self.linear_compression = nn.Linear(
                ctxt_bert_output_dim,
                cand_bert.embeddings.word_embeddings.weight.size(1))
def main(raw_args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name",
                        type=str,
                        required=True,
                        help="model name e.g. bert-base-uncased")
    parser.add_argument("--cache_dir",
                        type=str,
                        default=None,
                        required=False,
                        help="Directory containing pytorch model")
    parser.add_argument("--pytorch_model_path",
                        type=str,
                        required=True,
                        help="/path/to/<pytorch-model-name>.bin")
    parser.add_argument("--tf_cache_dir",
                        type=str,
                        required=True,
                        help="Directory in which to save tensorflow model")
    args = parser.parse_args(raw_args)

    model = BertModel.from_pretrained(
        pretrained_model_name_or_path=args.model_name,
        state_dict=torch.load(args.pytorch_model_path),
        cache_dir=args.cache_dir,
        args=args)

    convert_pytorch_checkpoint_to_tf(model=model,
                                     ckpt_dir=args.tf_cache_dir,
                                     model_name=args.model_name)
示例#3
0
def bertModel(*args, **kwargs):
    """
    BertModel is the basic BERT Transformer model with a layer of summed token,
    position and sequence embeddings followed by a series of identical
    self-attention blocks (12 for BERT-base, 24 for BERT-large).

    Example:
        # Load the tokenizer
        >>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
        #  Prepare tokenized input
        >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
        >>> tokenized_text = tokenizer.tokenize(text)
        >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> tokens_tensor = torch.tensor([indexed_tokens])
        >>> segments_tensors = torch.tensor([segments_ids])
        # Load bertModel
        >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
        >>> model.eval()
        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                encoded_layers, _ = model(tokens_tensor, segments_tensors)
    """
    model = BertModel.from_pretrained(*args, **kwargs)
    return model
示例#4
0
    def __init__(self,
                 params,
                 tokenizer,
                 start_mention_id=None,
                 end_mention_id=None):
        super(CrossEncoderModule, self).__init__()
        model_path = params["bert_model"]
        if params.get("roberta"):
            encoder_model = RobertaModel.from_pretrained(model_path)
        else:
            encoder_model = BertModel.from_pretrained(model_path)
        encoder_model.resize_token_embeddings(len(tokenizer))
        self.pool_highlighted = params["pool_highlighted"]
        self.encoder = BertEncoder(encoder_model,
                                   params["out_dim"],
                                   layer_pulled=params["pull_from_layer"],
                                   add_linear=params["add_linear"]
                                   and not self.pool_highlighted,
                                   get_all_outputs=self.pool_highlighted)
        self.config = self.encoder.bert_model.config
        self.start_mention_id = start_mention_id
        self.end_mention_id = end_mention_id

        if self.pool_highlighted:
            bert_output_dim = encoder_model.embeddings.word_embeddings.weight.size(
                1)
            output_dim = params["out_dim"]
            self.additional_linear = nn.Linear(2 * bert_output_dim, output_dim)
            self.dropout = nn.Dropout(0.1)
示例#5
0
 def __init__(self, params):
     super(BiEncoderModule, self).__init__()
     ctxt_bert = BertModel.from_pretrained(params["bert_model"])
     cand_bert = BertModel.from_pretrained(params['bert_model'])
     self.context_encoder = BertEncoder(
         ctxt_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.cand_encoder = BertEncoder(
         cand_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.config = ctxt_bert.config
示例#6
0
    def load(cls, model_name: str, cache_model: bool = True) -> BertModel:
        if model_name in cls._cache:
            return PretrainedBertModel._cache[model_name]

        model = BertModel.from_pretrained(model_name)
        if cache_model:
            cls._cache[model_name] = model

        return model
    def test_model_from_pretrained(self):
        logging.basicConfig(level=logging.INFO)
        for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            config = BertConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, PretrainedConfig)

            model = BertModel.from_pretrained(model_name)
            model, loading_info = BertModel.from_pretrained(model_name, output_loading_info=True)
            self.assertIsNotNone(model)
            self.assertIsInstance(model, PreTrainedModel)
            for value in loading_info.values():
                self.assertEqual(len(value), 0)

            config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
            model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
            self.assertEqual(model.config.output_attentions, True)
            self.assertEqual(model.config.output_hidden_states, True)
            self.assertEqual(model.config, config)
示例#8
0
 def __init__(self, params):
     super(BiEncoderModule, self).__init__()
     ctxt_bert = BertModel.from_pretrained(
         params["bert_model"]
     )  # Could be a path containing config.json and pytorch_model.bin; or could be an id shorthand for a model that is loaded in the library
     cand_bert = BertModel.from_pretrained(params["bert_model"])
     self.context_encoder = BertEncoder(
         ctxt_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.cand_encoder = BertEncoder(
         cand_bert,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.config = ctxt_bert.config
    def __init__(self, bert_model_name, cache_dir="./cache/"):
        super().__init__()

        # Create cache directory if not exists
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        self.bert_model = BertModel.from_pretrained(bert_model_name,
                                                    cache_dir=cache_dir,
                                                    output_hidden_states=True)
        self.bert_model.train()
示例#10
0
 def load(cls, pretrained_model_name_or_path, language=None):
     bert = cls()
     # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
     farm_lm_config = os.path.join(pretrained_model_name_or_path,
                                   "language_model_config.json")
     if os.path.exists(farm_lm_config):
         # FARM style
         bert_config = BertConfig.from_pretrained(farm_lm_config)
         farm_lm_model = os.path.join(pretrained_model_name_or_path,
                                      "language_model.bin")
         bert.model = BertModel.from_pretrained(farm_lm_model,
                                                config=bert_config)
         bert.language = bert.model.config.language
     else:
         # Pytorch-transformer Style
         bert.model = BertModel.from_pretrained(
             pretrained_model_name_or_path)
         bert.language = cls._infer_language_from_name(
             pretrained_model_name_or_path)
     return bert
示例#11
0
    def __init__(self, model_name_or_path, hidden_size=768, num_class=2):
        super(NeuralNet, self).__init__()

        self.config = BertConfig.from_pretrained(model_name_or_path,
                                                 num_labels=4)
        self.config.output_hidden_states = True
        self.bert = BertModel.from_pretrained(model_name_or_path,
                                              config=self.config)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.weights = torch.rand(13, 1).cuda()
        self.dropouts = nn.ModuleList([nn.Dropout(0.5) for _ in range(5)])
        self.fc = nn.Linear(hidden_size, num_class)
示例#12
0
 def __init__(self, params, tokenizer):
     super(CrossEncoderModule, self).__init__()
     model_path = params["bert_model"]
     if params.get("roberta"):
         encoder_model = RobertaModel.from_pretrained(model_path)
     else:
         encoder_model = BertModel.from_pretrained(model_path)
     encoder_model.resize_token_embeddings(len(tokenizer))
     self.encoder = BertEncoder(
         encoder_model,
         params["out_dim"],
         layer_pulled=params["pull_from_layer"],
         add_linear=params["add_linear"],
     )
     self.config = self.encoder.bert_model.config
示例#13
0
 def __init__(self, args, config):
     self.config = config
     self.config_model = config['model']
     self.args = args
     self.bert_node_encoder = Transformer_xh.from_pretrained(
         self.config['bert_model_file'],
         cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
         'distributed_{}'.format(args.local_rank))
     self.bert_node_encoder.encoder.build_model(args.hops)
     if args.arch == 'bert':
         self.bert_node_encoder = BertModel.from_pretrained(
             self.config['bert_model_file'],
             cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
             'distributed_{}'.format(args.local_rank))
     self.bert_config = self.bert_node_encoder.config
     self.network = ModelHelper(self.bert_node_encoder, self.args,
                                self.bert_config, self.config_model)
     self.device = args.device
示例#14
0
    def __init__(self, model_path):
        super(OnmtBertEncoder, self).__init__()
        config = BertConfig.from_json_file(
            os.path.join(model_path, "config.json"))
        pretrained_dict = os.path.join(model_path, "pytorch_model.bin")
        if os.path.exists(pretrained_dict):
            model = BertModel.from_pretrained(
                pretrained_model_name_or_path=pretrained_dict, config=config)
            print("init BERT model with {} weights".format(
                len(model.state_dict())))
        else:
            model = BertModel(config)
        model.embeddings.word_embeddings = expandEmbeddingByN(
            model.embeddings.word_embeddings, 4)
        model.embeddings.word_embeddings = expandEmbeddingByN(
            model.embeddings.word_embeddings, 2, last=True)

        self.encoder = model
        #print(model)
        print("***" * 20)
示例#15
0
 def __init__(
         self,
         config='a class with num_attention_heads, hidden_size, attention_probs_dropout_prob, output_attentions',
         bert_dir='/mnt/sda1/bert/uncased_L-12_H-768_A-12',
         drop=0.0,
         L=80,
         bert_dim=768,
         num_class=3,
         SDR=5,
         tp='cdm'):
     super(MY_BERT_LCF, self).__init__()
     self.text_bert = BertModel.from_pretrained(bert_dir)
     self.aspect_bert = copy.deepcopy(self.text_bert)
     self.aspect_self_att = SelfAttention(config, L)
     self.bert_pooler = BertPooler(config)
     if tp == 'cdm':
         self.reduce2_bert_dim = nn.Linear(bert_dim * 2, bert_dim)
     self.reduce2_num_class_linear = nn.Linear(bert_dim, num_class)
     self.drop = drop
     self.L = L
     self.SDR = SDR
     self.tp = tp
示例#16
0
class Words2VectorNet(superclass):
    def __init__(self, parameters, word_embeddings=None):
        if use_bert:
            super(Words2VectorNet, self).__init__(config)
            self.bert = BertModel(config)
            self.bert.from_pretrained('{}-uncased'.format('bert-base'))
            #if torch.cuda.device_count() > 1:
            #    self.bert = torch.nn.DataParallel(self.bert)
            self._p = parameters
            self._dropout = nn.Dropout(p=self._p.get('dropout', '0.1'))
            self._word_embedding = nn.Embedding(self._p['word.vocab.size'],
                                                self._p['word.emb.size'],
                                                padding_idx=0)
            if word_embeddings is not None:
                word_embeddings = torch.from_numpy(word_embeddings).float()
                self._word_embedding.weight = nn.Parameter(word_embeddings)
            self._word_embedding.weight.requires_grad = False
            self._nonlinearity = nn.ReLU() if self._p.get(
                'enc.activation', 'tanh') == 'relu' else nn.Tanh()
            out_size = 768
        else:
            super(Words2VectorNet, self).__init__()
            self._p = parameters
            self._dropout = nn.Dropout(p=self._p.get('dropout', '0.1'))
            self._word_embedding = nn.Embedding(self._p['word.vocab.size'],
                                                self._p['word.emb.size'],
                                                padding_idx=0)
            if word_embeddings is not None:
                word_embeddings = torch.from_numpy(word_embeddings).float()
                self._word_embedding.weight = nn.Parameter(word_embeddings)
            self._word_embedding.weight.requires_grad = False

            self._pos_embedding = nn.Embedding(3,
                                               self._p['poss.emb.size'],
                                               padding_idx=0)

            self._word_encoding_conv = nn.Conv1d(
                self._p['word.emb.size'] + self._p['poss.emb.size'],
                self._p['word.conv.size'],
                self._p['word.conv.width'],
                padding=self._p['word.conv.width'] // 2)

            self._nonlinearity = nn.ReLU() if self._p.get(
                'enc.activation', 'tanh') == 'relu' else nn.Tanh()
            self._convs = nn.ModuleList([
                nn.Sequential(
                    nn.Conv1d(
                        in_channels=self._p['word.conv.size'],
                        out_channels=self._p['word.conv.size'],
                        kernel_size=self._p['word.conv.width'],
                        padding=self._p['char.conv.width'] // 2 *
                        2**(j + 1) if not self._p.get("legacy.mode", False)
                        else self._p['char.conv.width'] // 2 + 2**(j + 1),
                        dilation=2**(j + 1),
                        bias=True), self._nonlinearity)
                for j in range(self._p.get('word.conv.depth', 1))
            ])
            self._block_conv = nn.Conv1d(self._p['word.conv.size'],
                                         self._p['word.conv.size'],
                                         self._p['word.conv.width'],
                                         padding=self._p['word.conv.width'] //
                                         2)
            out_size = self._p['word.conv.size']

        self.sem_layers = nn.Sequential(
            self._dropout,
            nn.Linear(out_size, self._p['word.enc.size']),
            self._nonlinearity,
        )

        self._pool = nn.AdaptiveMaxPool1d(1) if self._p.get(
            'enc.pooling', 'max') == 'max' else nn.AdaptiveAvgPool1d(1)

    def forward(self, sent_m_with_pos):
        if use_bert:
            # non-zero tokens are non-masked
            sent_m_with_pos = sent_m_with_pos.long()
            sent_m = self.bert(input_ids=sent_m_with_pos,
                               attention_mask=(sent_m_with_pos != 0))
            sent_m = sent_m[0][:, 0, :]
        else:
            sent_m_with_pos = sent_m_with_pos.long()
            sent_m = sent_m_with_pos[..., 0]
            positions = sent_m_with_pos[..., 1]
            sent_m = self._word_embedding(sent_m)
            positions = self._pos_embedding(positions)

            sent_m = torch.cat((sent_m, positions),
                               dim=-1).transpose(-2, -1).contiguous()
            sent_m = self._dropout(sent_m)
            sent_m = self._word_encoding_conv(sent_m)
            sent_m = self._nonlinearity(sent_m)

            for _ in range(self._p.get("word.repeat.convs", 3)):
                for convlayer in self._convs:
                    sent_m = convlayer(sent_m)
                sent_m = self._block_conv(sent_m)
                sent_m = self._nonlinearity(sent_m)

            sent_m = self._pool(sent_m).squeeze(dim=-1)
        sent_m = self.sem_layers(sent_m)
        return sent_m
示例#17
0
        "scorer":
        Scorer(custom_metric_funcs={"Tag_accuracy": tag_accuracy_scorer})
    }
}

# Get the absolute current working directory of the project
cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

# Create empty list to hold every Dataloader object
dataloaders = []

# Create empty list to hold every Task object
tasks = []

# Define the shared BERT layer to be used across tasks, and set the max seq length for the model.
shared_BERT_model = BertModel.from_pretrained('bert-base-uncased')
shared_BERT_model.config.max_position_embeddings = MAX_SEQ_LENGTH

# Confirm BERTs hidden layer size
hidden_layer_size = 768

# Make a module to contain the BERT module but can take the inputs of the Xs
bert_module = SnorkelFriendlyBert(bert_model=shared_BERT_model)

# Iterate through all task types
for task_type in ["Classification_Tasks", "Tagging_Tasks"]:

    # Get the contents of the data folder for the given task type
    target_data_path = os.path.join(cwd, "data", task_type)

    # Get names of all datasets in data folder
 def __init__(self, bert_model=BertModel.from_pretrained('bert-base-uncased')):
     super(SnorkelFriendlyBert, self).__init__()
     self.bert_layer = bert_model
     use_cuda = torch.cuda.is_available()
     self.device = torch.device('cuda:0' if use_cuda else 'cpu')
示例#19
0
 def init_model(self):
     logger.info(f'loading pretrain model from {self.pretrain_model}')
     model = BertModel.from_pretrained(self.pretrain_model)
     model.to(self.device)
     return model
示例#20
0
def main(config):
    args = config

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    processor = ATEPCProcessor()
    label_list = processor.get_labels()
    num_labels = len(label_list) + 1

    datasets = {
        'camera': "atepc_datasets/camera",
        'car': "atepc_datasets/car",
        'phone': "atepc_datasets/phone",
        'notebook': "atepc_datasets/notebook",
        'laptop': "atepc_datasets/laptop",
        'restaurant': "atepc_datasets/restaurant",
        'twitter': "atepc_datasets/twitter",
        'mixed': "atepc_datasets/mixed",
    }
    pretrained_bert_models = {
        'camera': "bert-base-chinese",
        'car': "bert-base-chinese",
        'phone': "bert-base-chinese",
        'notebook': "bert-base-chinese",
        'laptop': "bert-base-uncased",
        'restaurant': "bert-base-uncased",
        'twitter': "bert-base-uncased",
        'mixed': "bert-base-multilingual-uncased",
    }

    args.bert_model = pretrained_bert_models[args.dataset]
    args.data_dir = datasets[args.dataset]

    def convert_polarity(examples):
        for i in range(len(examples)):
            polarities = []
            for polarity in examples[i].polarity:
                if polarity == 2:
                    polarities.append(1)
                else:
                    polarities.append(polarity)
            examples[i].polarity = polarities

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=True)
    train_examples = processor.get_train_examples(args.data_dir)
    eval_examples = processor.get_test_examples(args.data_dir)
    num_train_optimization_steps = int(
        len(train_examples) / args.train_batch_size /
        args.gradient_accumulation_steps) * args.num_train_epochs
    bert_base_model = BertModel.from_pretrained(args.bert_model)
    bert_base_model.config.num_labels = num_labels

    if args.dataset in {'camera', 'car', 'phone', 'notebook'}:
        convert_polarity(train_examples)
        convert_polarity(eval_examples)
        model = LCF_ATEPC(bert_base_model, args=args)
    else:
        model = LCF_ATEPC(bert_base_model, args=args)

    for arg in vars(args):
        logger.info('>>> {0}: {1}'.format(arg, getattr(args, arg)))

    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.00001
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.00001
    }]

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      weight_decay=0.00001)
    eval_features = convert_examples_to_features(eval_examples, label_list,
                                                 args.max_seq_length,
                                                 tokenizer)
    all_spc_input_ids = torch.tensor([f.input_ids_spc for f in eval_features],
                                     dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                  dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                   dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)
    all_polarities = torch.tensor([f.polarities for f in eval_features],
                                  dtype=torch.long)
    all_valid_ids = torch.tensor([f.valid_ids for f in eval_features],
                                 dtype=torch.long)
    all_lmask_ids = torch.tensor([f.label_mask for f in eval_features],
                                 dtype=torch.long)
    eval_data = TensorDataset(all_spc_input_ids, all_input_mask,
                              all_segment_ids, all_label_ids, all_polarities,
                              all_valid_ids, all_lmask_ids)
    # Run prediction for full data
    eval_sampler = RandomSampler(eval_data)
    eval_dataloader = DataLoader(eval_data,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    def evaluate(eval_ATE=True, eval_APC=True):
        # evaluate
        apc_result = {'max_apc_test_acc': 0, 'max_apc_test_f1': 0}
        ate_result = 0
        y_true = []
        y_pred = []
        n_test_correct, n_test_total = 0, 0
        test_apc_logits_all, test_polarities_all = None, None
        model.eval()
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        for input_ids_spc, input_mask, segment_ids, label_ids, polarities, valid_ids, l_mask in eval_dataloader:
            input_ids_spc = input_ids_spc.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            valid_ids = valid_ids.to(device)
            label_ids = label_ids.to(device)
            polarities = polarities.to(device)
            l_mask = l_mask.to(device)

            with torch.no_grad():
                ate_logits, apc_logits = model(input_ids_spc,
                                               segment_ids,
                                               input_mask,
                                               valid_ids=valid_ids,
                                               polarities=polarities,
                                               attention_mask_label=l_mask)
            if eval_APC:
                polarities = model.get_batch_polarities(polarities)
                n_test_correct += (torch.argmax(
                    apc_logits, -1) == polarities).sum().item()
                n_test_total += len(polarities)

                if test_polarities_all is None:
                    test_polarities_all = polarities
                    test_apc_logits_all = apc_logits
                else:
                    test_polarities_all = torch.cat(
                        (test_polarities_all, polarities), dim=0)
                    test_apc_logits_all = torch.cat(
                        (test_apc_logits_all, apc_logits), dim=0)

            if eval_ATE:
                if not args.use_bert_spc:
                    label_ids = model.get_batch_token_labels_bert_base_indices(
                        label_ids)
                ate_logits = torch.argmax(F.log_softmax(ate_logits, dim=2),
                                          dim=2)
                ate_logits = ate_logits.detach().cpu().numpy()
                label_ids = label_ids.to('cpu').numpy()
                input_mask = input_mask.to('cpu').numpy()
                for i, label in enumerate(label_ids):
                    temp_1 = []
                    temp_2 = []
                    for j, m in enumerate(label):
                        if j == 0:
                            continue
                        elif label_ids[i][j] == len(label_list):
                            y_true.append(temp_1)
                            y_pred.append(temp_2)
                            break
                        else:
                            temp_1.append(label_map.get(label_ids[i][j], 'O'))
                            temp_2.append(label_map.get(ate_logits[i][j], 'O'))
        if eval_APC:
            test_acc = n_test_correct / n_test_total
            if args.dataset in {'camera', 'car', 'phone', 'notebook'}:
                test_f1 = f1_score(torch.argmax(test_apc_logits_all, -1).cpu(),
                                   test_polarities_all.cpu(),
                                   labels=[0, 1],
                                   average='macro')
            else:
                test_f1 = f1_score(torch.argmax(test_apc_logits_all, -1).cpu(),
                                   test_polarities_all.cpu(),
                                   labels=[0, 1, 2],
                                   average='macro')
            test_acc = round(test_acc * 100, 2)
            test_f1 = round(test_f1 * 100, 2)
            apc_result = {
                'max_apc_test_acc': test_acc,
                'max_apc_test_f1': test_f1
            }

        if eval_ATE:
            report = classification_report(y_true, y_pred, digits=4)
            tmps = report.split()
            ate_result = round(float(tmps[7]) * 100, 2)
        return apc_result, ate_result

    def save_model(path):
        # Save a trained model and the associated configuration,
        # Take care of the storage!
        os.makedirs(path, exist_ok=True)
        model_to_save = model.module if hasattr(
            model, 'module') else model  # Only save the model it-self
        model_to_save.save_pretrained(path)
        tokenizer.save_pretrained(path)
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        model_config = {
            "bert_model": args.bert_model,
            "do_lower": True,
            "max_seq_length": args.max_seq_length,
            "num_labels": len(label_list) + 1,
            "label_map": label_map
        }
        json.dump(model_config, open(os.path.join(path, "config.json"), "w"))
        logger.info('save model to: {}'.format(path))

    def train():
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_spc_input_ids = torch.tensor(
            [f.input_ids_spc for f in train_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in train_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in train_features],
                                     dtype=torch.long)
        all_polarities = torch.tensor([f.polarities for f in train_features],
                                      dtype=torch.long)
        train_data = TensorDataset(all_spc_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids,
                                   all_polarities, all_valid_ids,
                                   all_lmask_ids)

        train_sampler = SequentialSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)
        max_apc_test_acc = 0
        max_apc_test_f1 = 0
        max_ate_test_f1 = 0

        global_step = 0
        for epoch in range(int(args.num_train_epochs)):
            logger.info('#' * 80)
            logger.info('Train {} Epoch{}'.format(args.seed, epoch + 1,
                                                  args.data_dir))
            logger.info('#' * 80)
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(train_dataloader):
                model.train()
                batch = tuple(t.to(device) for t in batch)
                input_ids_spc, input_mask, segment_ids, label_ids, polarities, valid_ids, l_mask = batch
                loss_ate, loss_apc = model(input_ids_spc, segment_ids,
                                           input_mask, label_ids, polarities,
                                           valid_ids, l_mask)
                loss = loss_ate + loss_apc
                loss.backward()
                nb_tr_examples += input_ids_spc.size(0)
                nb_tr_steps += 1
                optimizer.step()
                optimizer.zero_grad()
                global_step += 1
                if global_step % args.eval_steps == 0:
                    if epoch >= args.num_train_epochs - 2 or args.num_train_epochs <= 2:
                        # evaluate only in last 2 epochs
                        apc_result, ate_result = evaluate(
                            eval_ATE=not args.use_bert_spc)
                        # apc_result, ate_result = evaluate()
                        path = '{0}/{1}_{2}_apcacc_{3}_apcf1_{4}_atef1_{5}'.format(
                            args.output_dir, args.dataset,
                            args.local_context_focus,
                            round(apc_result['max_apc_test_acc'], 2),
                            round(apc_result['max_apc_test_f1'], 2),
                            round(ate_result, 2))

                        if apc_result['max_apc_test_acc'] > max_apc_test_acc:
                            max_apc_test_acc = apc_result['max_apc_test_acc']
                        if apc_result['max_apc_test_f1'] > max_apc_test_f1:
                            max_apc_test_f1 = apc_result['max_apc_test_f1']
                        if ate_result > max_ate_test_f1:
                            max_ate_test_f1 = ate_result

                        if apc_result['max_apc_test_acc'] > max_apc_test_acc or \
                            apc_result['max_apc_test_f1'] > max_apc_test_f1 or \
                            ate_result > max_ate_test_f1:
                            save_model(path)

                        current_apc_test_acc = apc_result['max_apc_test_acc']
                        current_apc_test_f1 = apc_result['max_apc_test_f1']
                        current_ate_test_f1 = round(ate_result, 2)

                        logger.info('*' * 80)
                        logger.info('Train {} Epoch{}, Evaluate for {}'.format(
                            args.seed, epoch + 1, args.data_dir))
                        logger.info(
                            f'APC_test_acc: {current_apc_test_acc}(max: {max_apc_test_acc})  '
                            f'APC_test_f1: {current_apc_test_f1}(max: {max_apc_test_f1})'
                        )
                        if args.use_bert_spc:
                            logger.info(
                                f'ATE_test_F1: {current_apc_test_f1}(max: {max_apc_test_f1})'
                                f' (Unreliable since `use_bert_spc` is "True".)'
                            )
                        else:
                            logger.info(
                                f'ATE_test_f1: {current_ate_test_f1}(max:{max_ate_test_f1})'
                            )
                        logger.info('*' * 80)

        return [max_apc_test_acc, max_apc_test_f1, max_ate_test_f1]

    return train()
示例#21
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=False,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=False,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=False,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    ## Other parameters
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()

    # if args.server_ip and args.server_port:
    #     # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
    #     import ptvsd
    #     print("Waiting for debugger attach")
    #     ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
    #     ptvsd.wait_for_attach()

    processors = {"ner": NerProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels()
    num_labels = len(label_list)  # + 1

    pretrain_model_dir = 'bert-base-uncased'
    tokenizer = BertTokenizer.from_pretrained(pretrain_model_dir,
                                              do_lower_case=args.do_lower_case)
    model = BertModel.from_pretrained(pretrain_model_dir,
                                      output_hidden_states=True,
                                      output_attentions=True)
    print(tokenizer.tokenize('unfamiliar'))
    print(tokenizer.tokenize('disjoint'))
    print(
        tokenizer.tokenize(
            "Let's see all hidden-states and attentions on this text"))
    input_ids = torch.tensor(
        tokenizer.convert_tokens_to_ids(
            "Let's see all hidden-states and attentions on this text".lower(
            ).split()))
    all_hidden_states, all_attentions = model(input_ids)[-2:]
    print(all_hidden_states[-1].shape)
示例#22
0
# Date: 2020/12/4
# Author: Qianqian Peng

from mention_detection.mention_detection import load_model

import torch
from transformers import BertTokenizer as BertTokenizer_new
from transformers import BertConfig as BertConfig_new
from transformers import BertModel as BertModel_new
import torch.nn as nn

from pytorch_transformers.modeling_bert import (
    BertPreTrainedModel,
    BertConfig,
    BertModel,
)

bert_new = BertModel_new.from_pretrained(
    './model/bert-large-uncased',
    config=BertConfig_new.from_pretrained('bert-large-uncased'))
bert_old = BertModel.from_pretrained(
    './model/bert-large-uncased',
    config=BertConfig.from_pretrained('bert-large-uncased'))