Пример #1
0
def get_model(args):
    if args.model_size == 'debug':
        num_hidden_layers = 1
        embedding_size = 8
        hidden_size = 16
        num_hidden_groups = 1
        intermediate_size = 32
        num_attention_heads = 2
        args.gen_ratio = 2

    elif args.model_size == 'small':
        num_hidden_layers = 12
        embedding_size = 128
        hidden_size = 256
        num_hidden_groups = 1
        intermediate_size = 1024
        num_attention_heads = 4
    elif args.model_size == 'base':
        num_hidden_layers = 12
        embedding_size = 128
        hidden_size = 768
        num_hidden_groups = 1
        intermediate_size = 3072
        num_attention_heads = 12

    else:
        raise Exception('Which model? small, base, large')

    generator_config = AlbertConfig(
        max_position_embeddings=args.seq_length,
        vocab_size=args.vocab_size,
        num_hidden_layers=num_hidden_layers,
        embedding_size=embedding_size,
        num_hidden_groups=num_hidden_groups,
        hidden_size=hidden_size // args.gen_ratio,
        intermediate_size=intermediate_size // args.gen_ratio,
        num_attention_heads=num_attention_heads // args.gen_ratio,
    )

    discriminator_config = AlbertConfig(
        max_position_embeddings=args.seq_length,
        vocab_size=args.vocab_size,
        num_hidden_layers=num_hidden_layers,
        embedding_size=embedding_size,
        num_hidden_groups=num_hidden_groups,
        hidden_size=hidden_size,
        intermediate_size=intermediate_size,
        num_attention_heads=num_attention_heads,
    )

    model = Electra(args,
                    gen_config=generator_config,
                    dis_config=discriminator_config)
    return model
Пример #2
0
    def __init__(self, args, token_vocab_size, output_dim=100):
        super(LMCDecoderBERT, self).__init__()
        self.pool_layers = args.pool_bert

        if args.debug_model:
            bert_dim = 100
            num_hidden_layers = 1
            embedding_size = 100
            intermediate_size = 100
            output_dim = 100
        else:
            bert_dim = 256
            num_hidden_layers = 2
            embedding_size = 128
            intermediate_size = 256
        num_attention_heads = max(1, bert_dim // 64)
        print('Using {} attention heads in decoder'.format(num_attention_heads))

        config = AlbertConfig(
            vocab_size=token_vocab_size,
            embedding_size=embedding_size,
            hidden_size=bert_dim,
            num_hidden_layers=num_hidden_layers,
            intermediate_size=intermediate_size,  # 3072 is default
            num_attention_heads=num_attention_heads,
            output_hidden_states=self.pool_layers
        )

        self.bert = AlbertModel(config)

        self.u = nn.Linear(bert_dim, output_dim, bias=True)
        self.v = nn.Linear(bert_dim, 1, bias=True)
        self.att_linear = nn.Linear(bert_dim, 1, bias=True)
        self.dropout = nn.Dropout(0.2)
Пример #3
0
    def from_hocon(cls: Type[QueryCodeSiamese],
                   config: ConfigTree) -> QueryCodeSiamese:
        """Load Query1Code1_CodeSearchModel from a config tree"""

        if "training.model.encoder.type" in config:
            if config["training.model.encoder.type"] == "albert":
                logger.info("Creating QueryCodeSiamese with Albert encoder")
                albert_config = AlbertConfig(
                    **config["training.model.encoder"])
                encoder = PreTrainedModelRecordable(AlbertModel(albert_config))
            elif config["training.model.encoder.type"] == "bert":
                logger.info("Creating QueryCodeSiamese with Bert encoder")
                bert_config = BertConfig(**config["training.model.encoder"])
                encoder = PreTrainedModelRecordable(BertModel(bert_config))
        else:
            # default is BERT now
            logger.info("Creating QueryCodeSiamese with Bert encoder")
            bert_config = BertConfig(**config["training.model.encoder"])
            encoder = PreTrainedModelRecordable(BertModel(bert_config))

        model = QueryCodeSiamese(
            encoder=encoder,
            pooler=MeanWeightedPooler(
                input_size=config["training.model.encoder.hidden_size"]))

        return model
Пример #4
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = random_attention_mask(
                [self.batch_size, self.seq_length])

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        config = AlbertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            is_decoder=False,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, token_type_ids, attention_mask
 def __init__(self, albert_name="ALBERT-base", device="cuda"):
     super().__init__()
     if albert_name == "ALBERT-base":
         albert_configuration = AlbertConfig(hidden_size=768,
                                             num_attention_heads=12,
                                             intermediate_size=3072)
     elif albert_name == "ALBERT-xxlarge":
         albert_configuration = AlbertConfig()
     else:
         raise
     self.device = device
     self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
     self.model = AlbertModel.from_pretrained('albert-base-v2').to(
         self.device)
     self.linear = nn.Linear(model.config.embedding_size, 2).to(self.device)
     self.dropout = nn.Dropout(0.1).to(self.device)
def main(args):
    with open(args.config) as fp:
        data = json.loads(fp.read())
    config = AlbertConfig(**data)
    model = AlbertForMaskedLM(config)
    model: AlbertForMaskedLM = load_tf_weights_in_albert(model, config, args.checkpoint)
    model.save_pretrained(args.output)
def prediction():
    # data = input('请输入测试数据:')
    data = "Don't give me your attitude!"
    print(data)
    tokenized_data = tokenizer.tokenize(data)
    tokenized_data.insert(0, "[CLS]")
    tokenized_data.append("[SEP]")
    data_indexed = tokenizer.convert_tokens_to_ids(tokenized_data)
    data = torch.from_numpy(np.array(data_indexed)).to(device)
    data = data.unsqueeze(0)  # [1, seq_length]

    config = AlbertConfig(hidden_size=768)
    model = ALBertClassifyModel(config, num_class=2, fc_dropout=DROPOUT)
    model.load_state_dict(torch.load(SAVE_MODEL_PATH))
    model.to(device)
    model.eval()

    softmax = nn.Softmax(dim=1)

    with torch.no_grad():

        predict = model(data)
        predict_softmax = softmax(predict)
        print(predict_softmax)
        predict = torch.argmax(predict_softmax, dim=1)
        print(predict)
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = AlbertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
            return_dict=True,
        )

        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ALBERT constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = AlbertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = AlbertModel(config)
        elif pretrained_model_name is not None:
            model = AlbertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = AlbertConfig.from_json_file(config_filename)
            model = AlbertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ALBERT constructor"
            )

        model.to(self._device)

        self.add_module("albert", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Пример #10
0
 def __init__(self, bert_model='bert-base-cased'):
     super(BERTRepresenter, self).__init__()
     if 'albert' in bert_model.lower():
         config = AlbertConfig()
         self.bert = AlbertModel(config).from_pretrained(bert_model)
     else:
         config = BertConfig()
         # config = BertConfig(vocab_size=24000, hidden_size=264)
         self.bert = BertModel(config).from_pretrained(bert_model)
def load_tokenizer_model(ckpt):
    state = torch.load(ckpt, map_location=torch.device('cpu'))
    tokenizer = NGRAMTokenizer(state['ngram'])

    config = AlbertConfig(**state['config_dict'])
    model = Consonant(config)
    model.load_state_dict(state['model_state_dict'])

    step = int(ckpt.split('-')[-1].split('.')[0])

    return tokenizer, model, state['ngram'], step
def main(args):
    with open(args.config) as fp:
        data = json.loads(fp.read())
    config = AlbertConfig(**data)
    model = AlbertForMaskedLM(config)
    model: AlbertForMaskedLM = load_tf_weights_in_albert(
        model, config, args.checkpoint)
    model.save_pretrained(args.output)

    tokenizer = AlbertTokenizer.from_pretrained(args.spiece, keep_accents=True)
    tokenizer.save_pretrained(args.output)
Пример #13
0
    def __init__(self, in_dim, hidden_dim, out_dim, num_heads, num_classes=2):
        super(Summarizer, self).__init__()

        albert_base_configuration = AlbertConfig(
            hidden_size=256,
            num_attention_heads=4,
            intermediate_size=1024,
        )

        self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
        self.embedder = AlbertModel(albert_base_configuration)
        self.gat_classifier = GATClassifier(in_dim, hidden_dim, out_dim,
                                            num_heads, num_classes)
Пример #14
0
    def __init__(
            self,
            d_emb: int,
            d_ff: int,
            d_model: int,
            dropout: float,
            max_seq_len: int,
            num_attention_heads: int,
            num_class: int,
            num_hidden_layers: int,
            type_vocab_size: int,
            vocab_size: int,
    ):
        super().__init__()

        # Construct ALBERT model.
        self.encoder = AlbertModel(AlbertConfig(
            attention_probs_dropout_prob=dropout,
            classifier_dropout_prob=dropout,
            embedding_size=d_emb,
            hidden_dropout_prob=dropout,
            hidden_size=d_model,
            initializer_range=0.02,
            inner_group_num=1,
            intermediate_size=d_ff,
            layer_norm_eps=1e-12,
            max_position_embeddings=max_seq_len,
            num_hidden_layers=num_hidden_layers,
            num_hidden_groups=1,
            num_attention_heads=num_attention_heads,
            type_vocab_size=type_vocab_size,
            vocab_size=vocab_size
        ))

        # Dropout layer between encoder and linear layer.
        self.dropout = nn.Dropout(dropout)

        # Linear layer project from `d_model` into `num_class`.
        self.linear_layer = nn.Linear(
            in_features=d_model,
            out_features=num_class
        )

        # Linear layer initialization.
        with torch.no_grad():
            nn.init.normal_(
                self.linear_layer.weight,
                mean=0.0,
                std=0.02
            )
            nn.init.zeros_(self.linear_layer.bias)
Пример #15
0
    def __init__(self, vocab_size, max_len) -> None:
        super().__init__(
            AlbertConfig(
                vocab_size=vocab_size,

                hidden_size=512,
                num_attention_heads=8,
                num_hidden_layers=4,
                intermediate_size=1024,

                embedding_size=128,
                max_position_embeddings=max_len
            )
        )
Пример #16
0
 def get_config(self):
     return AlbertConfig(
         vocab_size=self.vocab_size,
         hidden_size=self.hidden_size,
         num_hidden_layers=self.num_hidden_layers,
         num_attention_heads=self.num_attention_heads,
         intermediate_size=self.intermediate_size,
         hidden_act=self.hidden_act,
         hidden_dropout_prob=self.hidden_dropout_prob,
         attention_probs_dropout_prob=self.attention_probs_dropout_prob,
         max_position_embeddings=self.max_position_embeddings,
         type_vocab_size=self.type_vocab_size,
         initializer_range=self.initializer_range,
         num_hidden_groups=self.num_hidden_groups,
     )
def train():
    train_sentences, train_labels = generate_data(FILE_PATH + 'Train_v1.txt',
                                                  MAX_SEQ_LENGTH)
    test_sentences, test_labels = generate_data(FILE_PATH + 'Test_v1.txt',
                                                MAX_SEQ_LENGTH)

    config = AlbertConfig(hidden_size=768)
    model = ALBertClassifyModel(config, num_class=2, fc_dropout=DROPOUT)
    model.to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    start = time.time()
    try:
        for epoch in range(EPOCHS):
            model.train()
            total_loss = 0.
            batch_num = len(train_sentences) // BATCH_SIZE
            batch = generate_batch(train_sentences, train_labels, BATCH_SIZE)
            for i in range(batch_num):
                data_batch, labels_batch = next(
                    batch)  # labels_batch: [batch_size]

                outputs = model(data_batch)

                outputs = outputs.view(
                    -1, outputs.shape[-1])  # [batch_size, class]

                optimizer.zero_grad()
                loss = criterion(outputs, labels_batch)
                total_loss += (loss.cpu().item() * BATCH_SIZE)

                loss.backward()
                optimizer.step()

            f1_score_test, accuracy_test = test(model, test_sentences,
                                                test_labels, BATCH_SIZE)

            print('epoch %d, loss_train %.4f, accuracy_test %.4f, f1_score_test % .4f, time %.2fmin' % \
                  (epoch+1, total_loss/(batch_num*BATCH_SIZE), accuracy_test, f1_score_test, (time.time()-start)/60))
        torch.save(model.state_dict(), SAVE_MODEL_PATH)
    except KeyboardInterrupt:
        # ctrl + c
        print('检测到外部中断,训练结束,模型已自动保存~')
        path = './albert_model/epoch_' + str(epoch) + '_epochbert_model.pth'
        torch.save(model.state_dict(), path)
    def initialize(self, ctx):
        torch.set_num_threads(1)
        self.manifest = ctx.manifest
        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        serialized_file = self.manifest['model']['serializedFile']
        model_pt_path = os.path.join(model_dir, serialized_file)
        setup_config_path = os.path.join(model_dir, "setup_config.json")

        if os.path.isfile(setup_config_path):
            with open(setup_config_path) as setup_config_file:
                self.setup_config = json.load(setup_config_file)
        else:
            logger.warning('Missing the setup_config.json file.')

        #Loading the model and tokenizer from checkpoint and config files based on the user's choice of mode
        #further setup config can be added.

        # model_pt_path = '../ckpt-0189000.bin'
        self.device = torch.device(
            "cpu"
        )  #"cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")

        if self.setup_config["save_mode"] == "torchscript":
            self.model = torch.jit.load(model_pt_path)
            self.tokenizer = NGRAMTokenizer(self.setup_config["ngram"])

        elif self.setup_config["save_mode"] == "pretrained":
            state = torch.load(model_pt_path, map_location=self.device)
            config = AlbertConfig(**state['config_dict'])
            self.model = Consonant(config)
            self.model.load_state_dict(state['model_state_dict'])
            self.tokenizer = NGRAMTokenizer(state["ngram"])

        else:
            logger.warning('Missing the checkpoint or state_dict.')

        self.model.to(self.device)
        self.model.eval()

        logger.debug(
            'Transformer model from path {0} loaded successfully'.format(
                model_pt_path))
        self.initialized = True
Пример #19
0
def get_model(args):
    if args.model_size == 'debug':
        num_hidden_layers = 1
        embedding_size = 8
        hidden_size = 16
        intermediate_size = 32
        num_attention_heads = 2
        args.gen_ratio = 2

    elif args.model_size == 'tiny':
        num_hidden_layers = 4
        embedding_size = 128
        hidden_size = 336
        intermediate_size = 1344
        num_attention_heads = 12
    elif args.model_size == 'small':
        num_hidden_layers = 12
        embedding_size = 128
        hidden_size = 256
        intermediate_size = 1024
        num_attention_heads = 4
    elif args.model_size == 'base':
        num_hidden_layers = 12
        embedding_size = 768
        hidden_size = 768
        intermediate_size = 3072
        num_attention_heads = 12

    else:
        raise Exception('Which model? small, base, large')

    config = AlbertConfig(
        max_position_embeddings=args.seq_length,
        vocab_size=args.vocab_size,
        num_hidden_layers=num_hidden_layers,
        embedding_size=embedding_size,
        hidden_size=hidden_size // args.gen_ratio,
        intermediate_size=intermediate_size // args.gen_ratio,
        num_attention_heads=num_attention_heads // args.gen_ratio,
    )

    model = AlbertForMaskedLM(config)
    return model
Пример #20
0
 def __call_model_tf(self):
     if self.model_to_use.lower() == 'bert':
         self.config = BertConfig(num_labels=2)
         self.model = TFBertForSequenceClassification.from_pretrained(
             'bert-base-uncased', config=self.config)
     elif self.model_to_use.lower() == 'albert':
         self.config = AlbertConfig(num_labels=2)
         self.model = TFAlbertForSequenceClassification.from_pretrained(
             'albert-base-v1', config=self.config)
     elif self.model_to_use.lower() == 'electra':
         print(
             'Electra not avaiable for sequence classification with Tensorflow yet.'
         )
     elif self.model_to_use.lower() == 'distilbert':
         self.config = DistilBertConfig(num_labels=2)
         self.model = TFDistilBertForSequenceClassification.from_pretrained(
             'distilbert-base-uncased', config=self.config)
     else:
         print('Model not avaiable yet.')
Пример #21
0
 def __call_model_torch(self):
     if self.model_to_use.lower() == 'bert':
         self.config = BertConfig(num_labels=2)
         self.model = BertForSequenceClassification.from_pretrained(
             'bert-base-uncased', config=self.config)
     elif self.model_to_use.lower() == 'albert':
         self.config = AlbertConfig(num_labels=2)
         self.model = AlbertForSequenceClassification.from_pretrained(
             'albert-base-v1', config=self.config)
     elif self.model_to_use.lower() == 'electra':
         self.config = ElectraConfig(num_labels=2)
         self.model = ElectraForSequenceClassification.from_pretrained(
             'google/electra-small-discriminator', config=self.config)
     elif self.model_to_use.lower() == 'distilbert':
         self.config = DistilBertConfig(num_labels=2)
         self.model = DistilBertForSequenceClassification.from_pretrained(
             'distilbert-base-uncased', config=self.config)
     else:
         print('Model not avaiable yet.')
    def __init__(self,
                 train_path: str = None,
                 dev_path: str = None,
                 test_path: str = None,
                 model_path: str = None,
                 config_path: str = None,
                 tokenizer: AlbertTokenizer = None,
                 num_classes: int = 2,
                 cuda_device: int = 0,
                 batch_size: int = 4,
                 num_workers: int = 0,
                 lr: float = 2e-5,
                 weight_decay: float = 0.1,
                 warm_up: int = 20):
        super(KbAlbertClassificationModel, self).__init__()

        self.num_classes = num_classes
        self.cuda_device = cuda_device
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.lr = lr
        self.weight_decay = weight_decay
        self.warm_up = warm_up

        self.save_hyperparameters()

        self.train_dataset = KbAlbertDataset(train_path, tokenizer)
        self.val_dataset = KbAlbertDataset(dev_path, tokenizer)
        self.test_dataset = KbAlbertDataset(test_path, tokenizer)

        f = open(config_path, encoding='UTF-8')
        config_dict = json.loads(f.read())
        config = AlbertConfig(**config_dict)
        self.text_embedding = AlbertModel.from_pretrained(pretrained_model_name_or_path=model_path,
                                                          config=config)

        self.classifier_hidden_size = self.text_embedding.config.hidden_size
        self.classifier = nn.Linear(self.classifier_hidden_size, self.num_classes)
def main(tokenizer_path,
         dataset_path,
         save_path='alectra-small',
         max_steps=1e6,
         accumulate_grad_batches=1,
         gpus=None,
         num_tpu_cores=None,
         distributed_backend=None,
         val_check_interval=0.25,
         val_check_percent=0.25,
         generator_type='albert',
         num_hidden_groups=1,
         d_loss_weight=50,
         mlm_prob=0.15,
         learning_rate=5e-4,
         warmup_steps=10000,
         batch_size=128,
         num_workers=2,
         tie_embedding_proj=False,
         tie_encoder=True,
         shuffle=True,
         lr_schedule='linear',
         resume_from_checkpoint=None,
         use_polyaxon=False):
    # init tokenizer.  only need it for the special chars.
    tokenizer = BertWordPieceTokenizer(tokenizer_path)

    # init generator.
    if generator_type == 'albert':
        generator_config = AlbertConfig(
            vocab_size=tokenizer._tokenizer.get_vocab_size(),
            hidden_size=256,
            embedding_size=128,
            num_hidden_layers=3,
            num_attention_heads=1,
            num_hidden_groups=num_hidden_groups,
            intermediate_size=1024,
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            classifier_dropout_prob=0.1,
            max_position_embeddings=128)
        generator = AlbertForMaskedLM(generator_config)
    elif generator_type == 'bert':
        generator_config = BertConfig(
            vocab_size=tokenizer._tokenizer.get_vocab_size(),
            hidden_size=128,
            num_hidden_layers=3,
            num_attention_heads=1,
            intermediate_size=256,
            max_position_embeddings=128)
        generator = BertForMaskedLM(generator_config)
        tie_weights(generator.cls.predictions.decoder,
                    generator.bert.embeddings.word_embeddings)
    else:
        raise Exception(f"invalid generator type: {generator_type}")

    # init discriminator.
    discriminator_config = AlbertConfig(
        vocab_size=tokenizer._tokenizer.get_vocab_size(),
        hidden_size=256,
        embedding_size=128,
        num_hidden_layers=12,
        num_attention_heads=4,
        num_hidden_groups=num_hidden_groups,
        intermediate_size=1024,
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        classifier_dropout_prob=0.1,
        max_position_embeddings=128)
    discriminator = AlbertForTokenClassification(discriminator_config)

    # tie the embeddingg weights.
    tie_weights(discriminator.base_model.embeddings.word_embeddings,
                generator.base_model.embeddings.word_embeddings)
    tie_weights(discriminator.base_model.embeddings.position_embeddings,
                generator.base_model.embeddings.position_embeddings)
    tie_weights(discriminator.base_model.embeddings.token_type_embeddings,
                generator.base_model.embeddings.token_type_embeddings)

    if generator_type == 'albert' and tie_encoder:
        print('tying albert encoder layers')
        discriminator.albert.encoder.albert_layer_groups = generator.albert.encoder.albert_layer_groups
    if generator_type == 'albert' and tie_embedding_proj:
        print('tying embedding projection layers')
        discriminator.albert.encoder.embedding_hidden_mapping_in = generator.albert.encoder.embedding_hidden_mapping_in

    # init training module.
    training_config = DiscLMTrainingModuleConfig(max_steps,
                                                 d_loss_weight=d_loss_weight,
                                                 save_path=save_path,
                                                 weight_decay=0.01,
                                                 learning_rate=learning_rate,
                                                 epsilon=1e-6,
                                                 lr_schedule=lr_schedule,
                                                 warmup_steps=warmup_steps)
    if use_polyaxon:
        checkpoint_fn = polyaxon_checkpoint_fn
    else:
        checkpoint_fn = None
    lightning_module = DiscLMTrainingModule(generator,
                                            discriminator,
                                            training_config,
                                            checkpoint_fn=checkpoint_fn)

    # init trainer.
    trainer = Trainer(accumulate_grad_batches=accumulate_grad_batches,
                      gpus=gpus,
                      num_tpu_cores=num_tpu_cores,
                      distributed_backend=distributed_backend,
                      max_steps=max_steps,
                      resume_from_checkpoint=resume_from_checkpoint,
                      val_check_percent=val_check_percent,
                      val_check_interval=val_check_interval)

    # init dataloaders.
    train_loader, val_loader, _ = get_dataloaders(tokenizer, dataset_path,
                                                  trainer, mlm_prob,
                                                  batch_size, num_workers,
                                                  shuffle)

    # train.
    trainer.fit(lightning_module, train_loader, val_loader)

    # save the model.
    output_path = os.path.join(save_path, 'discriminator', 'final')
    os.makedirs(output_path, exist_ok=True)
    lightning_module.discriminator.base_model.save_pretrained(output_path)
    if checkpoint_fn:
        checkpoint_fn(lightning_module)
Пример #24
0
train_dataset = np.array(list(dict(train_encodings).values()))
val_dataset = np.array(list(dict(val_encodings).values()))

BATCH_SIZE = 16

# Create a callback that saves the model's weights every x epochs
checkpoint_path = "albert16_ckpt/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True)

save_model = True

config = AlbertConfig(num_labels=3,
                      return_dict=True,
                      model_type='albert-base-v2')

model = TFAlbertForSequenceClassification(config=config)

if save_model:
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    model.compile(optimizer=optimizer,
                  loss=model.compute_loss,
                  metrics=['accuracy'])

    model.fit(train_dataset[0],
              np.array(y_list),
              epochs=5,
              batch_size=BATCH_SIZE,
              callbacks=[cp_callback])
Пример #25
0
def main():

    args = make_parser()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    seed_everything(args.seed)

    # Prepare output directory
    if not os.path.exists(os.path.join('../', args.output_dir)):
        os.mkdir(os.path.join('../', args.output_dir))
  
    args.output_dir = os.path.join('../', args.output_dir, args.exp_name)
    if os.path.exists(args.output_dir):
        flag_continue = input(f"Model name [{args.exp_name}] already exists. Do you want to overwrite? (y/n): ")
        if flag_continue.lower() == 'y' or flag_continue.lower() == 'yes':
            shutil.rmtree(args.output_dir)
            os.mkdir(args.output_dir)
        else:
            print("Exit pre-training")
            exit()
    else:
        os.mkdir(args.output_dir)

    # Setup for neptune logger
    neptune_api_key = os.environ['NEPTUNE_API_TOKEN']
    neptune_project_name = 'kevinjo/cs372'
    neptune_experiment_name = args.exp_name
    neptune_logger = NeptuneLogger(
        api_key=neptune_api_key,
        project_name=neptune_project_name,
        experiment_name=neptune_experiment_name,
        tags=["torch", "pretrain"],
        params=vars(args)
    )

    # Setup for pytorch-lightning params
    train_params = dict(
        logger=neptune_logger,
        gpus=args.n_gpu,
        gradient_clip_val=args.max_grad_norm,
        early_stop_callback=None,
        checkpoint_callback=False,
        # val_check_interval=args.validation_step,
        accumulate_grad_batches=args.grad_accum_steps,
        max_steps=args.max_steps,
        benchmark=args.benchmark,
    )

    # Setup for albert model 
    albert_base_configuration = AlbertConfig(
        classifier_dropout_prob = args.classifier_dropout_prob,
        hidden_size=args.hidden_size,
        embedding_size=args.embedding_size,
        num_attention_heads=args.num_attention_heads,
        num_hidden_layers=args.num_hidden_layers,
        num_hidden_groups=args.num_hidden_groups,
        intermediate_size=args.intermediate_size,
        vocab_size = args.vocab_size,
        max_position_embeddings= args.max_position_embeddings,
        output_vocab_size = args.output_vocab_size,
        type_vocab_size = args.type_vocab_size,
    )
    model = ConsonantAlbert(args, albert_base_configuration)

    # Start model training
    trainer = pl.Trainer(auto_lr_find=False, profiler=False, amp_level='O2', precision=16, **train_params)
    if args.do_train:
        trainer.fit(model)
    return
import torch
from transformers import AlbertModel, AlbertConfig

from consonant.model.modeling import Consonant
from consonant.model.tokenization import NGRAMTokenizer

if __name__ == '__main__':

    ckpt = '../ckpt-0078000.bin'
    device = torch.device(
        "cpu"
    )  #"cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
    state = torch.load(ckpt, map_location=device)
    print(state['ngram'])

    config = AlbertConfig(**state['config_dict'])
    config.attention_probs_dropout_prob = 0.0
    config.hidden_dropout_prob = 0.0
    print(config)
    model = Consonant(config)
    model.load_state_dict(state['model_state_dict'])

    tokenizer = NGRAMTokenizer(1)
    inputs = tokenizer.encode("sample text",
                              max_char_length=100,
                              return_attention_mask=True)
    input_ids = torch.tensor([inputs["head_ids"]], dtype=torch.long)

    traced_model = torch.jit.trace(model, [input_ids, input_ids])
    torch.jit.save(traced_model, "traced_model.pt")
Пример #27
0
def albert_config(cfg, args) -> AlbertConfig:
    model_name = (
        f"calbert-{cfg.model.name}-{'uncased' if cfg.vocab.lowercase else 'cased'}"
    )

    return AlbertConfig(vocab_size=cfg.vocab.max_size, **dict(cfg.model))
Пример #28
0
        self.sum += val * n
        self.count += n
        self.avg = float(self.sum) / float(self.count)


def is_int(s):
    try:
        int(s)
        return True
    except ValueError:
        return False


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


if __name__ == '__main__':

    from transformers import BertConfig, AlbertConfig, BertModel, AlbertModel
    bert = BertModel(
        BertConfig(hidden_size=768,
                   num_attention_heads=12,
                   intermediate_size=3072))
    albert = AlbertModel(
        AlbertConfig(hidden_size=768,
                     num_attention_heads=12,
                     intermediate_size=3072))

    print("Number of parameters in BERT:   %d" % count_parameters(bert))
    print("Number of parameters in ALBERT: %d" % count_parameters(albert))
    def __init__(self, hparams):
        torch.nn.Module.__init__(self)

        assert isinstance(
            hparams.encoder, dict
        ), "hparams.encoder must be a dict. If not multi node types, use MonoplexEmbedder instead."
        assert isinstance(
            hparams.embedder, dict
        ), "hparams.embedder must be a dict. If not multi-layer, use MonoplexEmbedder instead."
        self.hparams = copy.copy(hparams)

        ################### Encoding ####################
        self.node_types = list(hparams.encoder.keys())
        for node_type, encoder in hparams.encoder.items():
            if encoder == "ConvLSTM":
                hparams.vocab_size = self.hparams.vocab_size[node_type]
                self.set_encoder(node_type, ConvLSTM(hparams))

            elif encoder == "Albert":
                config = AlbertConfig(
                    vocab_size=hparams.vocab_size,
                    embedding_size=hparams.word_embedding_size,
                    hidden_size=hparams.encoding_dim,
                    num_hidden_layers=hparams.num_hidden_layers,
                    num_hidden_groups=hparams.num_hidden_groups,
                    hidden_dropout_prob=hparams.hidden_dropout_prob,
                    attention_probs_dropout_prob=hparams.
                    attention_probs_dropout_prob,
                    num_attention_heads=hparams.num_attention_heads,
                    intermediate_size=hparams.intermediate_size,
                    type_vocab_size=1,
                    max_position_embeddings=hparams.max_length,
                )
                self.set_encoder(node_type, AlbertEncoder(config))

            elif "NodeIDEmbedding" in encoder:
                # `encoder` is a dict with {"NodeIDEmbedding": hparams}
                self.set_encoder(
                    node_type,
                    NodeIDEmbedding(hparams=encoder["NodeIDEmbedding"]))

            elif "Linear" in encoder:
                encoder_hparams = encoder["Linear"]
                self.set_encoder(
                    node_type,
                    torch.nn.Linear(in_features=encoder_hparams["in_features"],
                                    out_features=hparams.encoding_dim))

            else:
                raise Exception(
                    "hparams.encoder must be one of {'ConvLSTM', 'Albert', 'NodeIDEmbedding'}"
                )

        ################### Layer-specfic Embedding ####################
        self.layers = list(hparams.embedder)
        if hparams.multiplex_embedder == "ExpandedMultiplexGAT":
            self._embedder = ExpandedMultiplexGAT(
                in_channels=hparams.encoding_dim,
                out_channels=int(hparams.embedding_dim / len(self.node_types)),
                node_types=self.node_types,
                layers=self.layers,
                dropout=hparams.nb_attn_dropout)
        else:
            print(
                '"multiplex_embedder" used. Concatenate multi-layer embeddings instead.'
            )

        ################### Classifier ####################
        if hparams.classifier == "Dense":
            self._classifier = DenseClassification(hparams)
        elif hparams.classifier == "HierarchicalAWX":
            self._classifier = HierarchicalAWX(hparams)
        else:
            raise Exception("hparams.classifier must be one of {'Dense'}")

        if hparams.use_hierar:
            label_map = pd.Series(range(len(hparams.classes)),
                                  index=hparams.classes).to_dict()
            hierar_relations = get_hierar_relations(
                hparams.hierar_taxonomy_file, label_map=label_map)

        self.criterion = ClassificationLoss(
            n_classes=hparams.n_classes,
            class_weight=None if not hasattr(hparams, "class_weight") else
            torch.tensor(hparams.class_weight),
            loss_type=hparams.loss_type,
            hierar_penalty=hparams.hierar_penalty
            if hparams.use_hierar else None,
            hierar_relations=hierar_relations if hparams.use_hierar else None)
    def __init__(self, hparams):
        torch.nn.Module.__init__(self)

        assert isinstance(
            hparams.encoder, dict
        ), "hparams.encoder must be a dict. If not multi node types, use MonoplexEmbedder instead."
        assert isinstance(
            hparams.embedder, dict
        ), "hparams.embedder must be a dict. If not multi-layer, use MonoplexEmbedder instead."
        self.hparams = hparams

        ################### Encoding ####################
        self.node_types = list(hparams.encoder.keys())
        for node_type, encoder in hparams.encoder.items():
            if encoder == "ConvLSTM":
                assert not (len(hparams.encoder) > 1
                            and not len(hparams.vocab_size) > 1)
                self.set_encoder(node_type, ConvLSTM(hparams))

            elif encoder == "Albert":
                assert not (len(hparams.encoder) > 1
                            and not len(hparams.vocab_size) > 1)
                config = AlbertConfig(
                    vocab_size=hparams.vocab_size,
                    embedding_size=hparams.word_embedding_size,
                    hidden_size=hparams.encoding_dim,
                    num_hidden_layers=hparams.num_hidden_layers,
                    num_hidden_groups=hparams.num_hidden_groups,
                    hidden_dropout_prob=hparams.hidden_dropout_prob,
                    attention_probs_dropout_prob=hparams.
                    attention_probs_dropout_prob,
                    num_attention_heads=hparams.num_attention_heads,
                    intermediate_size=hparams.intermediate_size,
                    type_vocab_size=1,
                    max_position_embeddings=hparams.max_length,
                )
                self.set_encoder(node_type, AlbertEncoder(config))

            elif "NodeIDEmbedding" in encoder:
                # `encoder` is a dict with {"NodeIDEmbedding": hparams}
                self.set_encoder(
                    node_type,
                    NodeIDEmbedding(hparams=encoder["NodeIDEmbedding"]))
            elif "Linear" in encoder:
                encoder_hparams = encoder["Linear"]
                self.set_encoder(
                    node_type,
                    torch.nn.Linear(in_features=encoder_hparams["in_features"],
                                    out_features=hparams.encoding_dim))

            else:
                raise Exception(
                    "hparams.encoder must be one of {'ConvLSTM', 'Albert', 'NodeIDEmbedding'}"
                )

        ################### Layer-specfic Embedding ####################
        for subnetwork_type, embedder_model in hparams.embedder.items():
            if embedder_model == "GAT":
                self.set_embedder(subnetwork_type, GAT(hparams))
            elif embedder_model == "GCN":
                self.set_embedder(subnetwork_type, GCN(hparams))
            elif embedder_model == "GraphSAGE":
                self.set_embedder(subnetwork_type, GraphSAGE(hparams))
            else:
                raise Exception(
                    f"Embedder model for hparams.embedder[{subnetwork_type}]] must be one of ['GAT', 'GCN', 'GraphSAGE']"
                )

        ################### Multiplex Embedding ####################
        layers = list(hparams.embedder.keys())
        self.layers = layers
        if hparams.multiplex_embedder == "MultiplexLayerAttention":
            self._multiplex_embedder = MultiplexLayerAttention(
                embedding_dim=hparams.embedding_dim,
                hidden_dim=hparams.multiplex_hidden_dim,
                attention_dropout=hparams.multiplex_attn_dropout,
                layers=layers)
            hparams.embedding_dim = hparams.multiplex_hidden_dim
        elif hparams.multiplex_embedder == "MultiplexNodeAttention":
            self._multiplex_embedder = MultiplexNodeAttention(
                embedding_dim=hparams.embedding_dim,
                hidden_dim=hparams.multiplex_hidden_dim,
                attention_dropout=hparams.multiplex_attn_dropout,
                layers=layers)
            hparams.embedding_dim = hparams.multiplex_hidden_dim
        else:
            print(
                '"multiplex_embedder" not used. Concatenate multi-layer embeddings instead.'
            )
            hparams.embedding_dim = hparams.embedding_dim * len(
                hparams.embedder)

        ################### Classifier ####################
        if hparams.classifier == "Dense":
            self._classifier = DenseClassification(hparams)
        elif hparams.classifier == "HierarchicalAWX":
            self._classifier = HierarchicalAWX(hparams)
        else:
            raise Exception("hparams.classifier must be one of {'Dense'}")

        if hparams.use_hierar:
            label_map = pd.Series(range(len(hparams.classes)),
                                  index=hparams.classes).to_dict()
            hierar_relations = get_hierar_relations(
                hparams.hierar_taxonomy_file, label_map=label_map)

        self.criterion = ClassificationLoss(
            n_classes=hparams.n_classes,
            class_weight=None if not hasattr(hparams, "class_weight") else
            torch.tensor(hparams.class_weight),
            loss_type=hparams.loss_type,
            hierar_penalty=hparams.hierar_penalty
            if hparams.use_hierar else None,
            hierar_relations=hierar_relations if hparams.use_hierar else None)