def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size).clamp(3, )
        input_ids[:, -1] = self.eos_token_id  # Eos Token

        decoder_input_ids = ids_tensor([self.batch_size, self.seq_length],
                                       self.vocab_size)

        config = PegasusConfig(
            vocab_size=self.vocab_size,
            d_model=self.hidden_size,
            encoder_layers=self.num_hidden_layers,
            decoder_layers=self.num_hidden_layers,
            encoder_attention_heads=self.num_attention_heads,
            decoder_attention_heads=self.num_attention_heads,
            encoder_ffn_dim=self.intermediate_size,
            decoder_ffn_dim=self.intermediate_size,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            eos_token_id=self.eos_token_id,
            bos_token_id=self.bos_token_id,
            pad_token_id=self.pad_token_id,
        )
        inputs_dict = prepare_pegasus_inputs_dict(config, input_ids,
                                                  decoder_input_ids)
        return config, inputs_dict
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2)

        lm_labels = None
        if self.use_labels:
            lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)

        config = PegasusConfig(
            vocab_size=self.vocab_size,
            d_model=self.d_model,
            decoder_layers=self.decoder_layers,
            decoder_ffn_dim=self.decoder_ffn_dim,
            encoder_attention_heads=self.encoder_attention_heads,
            decoder_attention_heads=self.decoder_attention_heads,
            eos_token_id=self.eos_token_id,
            bos_token_id=self.bos_token_id,
            use_cache=self.use_cache,
            pad_token_id=self.pad_token_id,
            decoder_start_token_id=self.decoder_start_token_id,
            max_position_embeddings=self.max_position_embeddings,
            is_encoder_decoder=self.is_encoder_decoder,
        )

        return (
            config,
            input_ids,
            attention_mask,
            lm_labels,
        )
Exemplo n.º 3
0
 def __init__(self, parent):
     self.config = PegasusConfig(
         vocab_size=99,
         d_model=24,
         encoder_layers=2,
         decoder_layers=2,
         encoder_attention_heads=2,
         decoder_attention_heads=2,
         encoder_ffn_dim=32,
         decoder_ffn_dim=32,
         max_position_embeddings=48,
         add_final_layer_norm=True,
     )
 def get_pipeline_config(self):
     return PegasusConfig(
         vocab_size=200,
         d_model=self.hidden_size,
         encoder_layers=self.num_hidden_layers,
         decoder_layers=self.num_hidden_layers,
         encoder_attention_heads=self.num_attention_heads,
         decoder_attention_heads=self.num_attention_heads,
         encoder_ffn_dim=self.intermediate_size,
         decoder_ffn_dim=self.intermediate_size,
         dropout=self.hidden_dropout_prob,
         attention_dropout=self.attention_probs_dropout_prob,
         max_position_embeddings=200,
         eos_token_id=self.eos_token_id,
         bos_token_id=self.bos_token_id,
         pad_token_id=self.pad_token_id,
     )
def convert_pegasus_to_bart(
        tf_weights: dict,
        cfg_updates: dict) -> PegasusForConditionalGeneration:
    cfg_kwargs = DEFAULTS.copy()
    cfg_kwargs.update(cfg_updates)

    cfg = PegasusConfig(**cfg_updates)
    bart = PegasusForConditionalGeneration(cfg)
    sd = bart.model.state_dict()
    mapping = {}
    for k, v in tf_weights.items():
        new_k = rename_state_dict_key(k)
        if new_k not in sd:
            raise ValueError(
                f"could not find new key {new_k} in state dict. (converted from {k})"
            )

        if "dense" in k or "proj" in new_k:
            v = v.T
        mapping[new_k] = torch.tensor(v, dtype=sd[new_k].dtype)
        assert v.shape == sd[
            new_k].shape, f"{new_k}, {k}, {v.shape}, {sd[new_k].shape}"
    # make sure embedding.padding_idx is respected
    mapping["shared.weight"][cfg.pad_token_id] = torch.zeros_like(
        mapping["shared.weight"][cfg.pad_token_id + 1])
    mapping["encoder.embed_tokens.weight"] = mapping["shared.weight"]
    mapping["decoder.embed_tokens.weight"] = mapping["shared.weight"]
    empty_biases = {
        k: torch.zeros_like(v)
        for k, v in sd.items() if k.endswith("bias") and k not in mapping
    }
    mapping.update(**empty_biases)
    missing, extra = bart.model.load_state_dict(mapping, strict=False)
    unexpected_missing = [
        k for k in missing if k not in
        ["encoder.embed_positions.weight", "decoder.embed_positions.weight"]
    ]
    assert unexpected_missing == [], f"no matches found for the following torch keys {unexpected_missing}"
    assert extra == [], f"no matches found for the following tf keys {extra}"
    return bart
Exemplo n.º 6
0
    return {'Total': total_num, 'Trainable': trainable_num}


text = "四海网讯,近日,有媒体报道称:章子怡真怀孕了!报道还援引知情人士消息称,“章子怡怀孕大概四五个月,预产期是年底前后,现在已经不接工作了。”这到底是怎么回事?消息是真是假?针对此消息,23日晚8时30分," \
       "华西都市报记者迅速联系上了与章子怡家里关系极好的知情人士,这位人士向华西都市报记者证实说:“子怡这次确实怀孕了。她已经36岁了,也该怀孕了。章子怡怀上汪峰的孩子后,子怡的父母亲十分高兴。子怡的母亲," \
       "已开始悉心照料女儿了。子怡的预产期大概是今年12月底。”当晚9时,华西都市报记者为了求证章子怡怀孕消息,又电话联系章子怡的亲哥哥章子男,但电话通了," \
       "一直没有人接听。有关章子怡怀孕的新闻自从2013年9月份章子怡和汪峰恋情以来,就被传N遍了!不过,时间跨入2015年,事情却发生着微妙的变化。2015年3月21日,章子怡担任制片人的电影《从天儿降》开机," \
       "在开机发布会上几张合影,让网友又燃起了好奇心:“章子怡真的怀孕了吗?”但后据证实,章子怡的“大肚照”只是影片宣传的噱头。过了四个月的7月22日,《太平轮》新一轮宣传,章子怡又被发现状态不佳,不时深呼吸," \
       "不自觉想捂住肚子,又觉得不妥。然后在8月的一天,章子怡和朋友吃饭,在酒店门口被风行工作室拍到了,疑似有孕在身!今年7月11日,汪峰本来在上海要举行演唱会,后来因为台风“灿鸿”取消了。而消息人士称," \
       "汪峰原来打算在演唱会上当着章子怡的面宣布重大消息,而且章子怡已经赴上海准备参加演唱会了,怎知遇到台风,只好延期,相信9月26日的演唱会应该还会有惊喜大白天下吧。 "

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained(
    'uer/pegasus-base-chinese-cluecorpussmall')

config = PegasusConfig()
# config.activation_dropout = 0.1
config.activation_function = 'relu'
config.d_model = 384
config.decoder_attention_heads = 6
config.decoder_ffn_dim = 1536
config.decoder_start_token_id = 101
config.decoder_layers = 6
config.dropout = 0.0  # 关闭dropout,保证每次预测结果相同
config.encoder_attention_heads = 6
config.encoder_ffn_dim = 1536
config.encoder_layers = 6
config.forced_eos_token_id = 102
config.scale_embedding = True
config.vocab_size = 21128