Python ElectraModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: gluonnlp.models.electra

Класс/Тип: ElectraModel

Примеров на hotexamples.com: 6

Python ElectraModel - 6 примеров найдено. Это лучшие примеры Python кода для gluonnlp.models.electra.ElectraModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_cfg(3)

get_cfg(2)

Основные методы

from_cfg (3)

get_cfg (2)

Пример #1

Показать файл

Файл: test_models_electra.py Проект: liangfu/gluon-nlp

def test_electra_model(compute_layout):
    cfg = get_test_cfg()
    cfg.defrost()
    cfg.MODEL.compute_layout = compute_layout
    cfg.freeze()

    # Generate TN layout
    cfg_tn = cfg.clone()
    cfg_tn.defrost()
    cfg_tn.MODEL.layout = 'TN'
    cfg_tn.freeze()

    # Sample data
    batch_size = 4
    sequence_length = 16
    num_mask = 3
    inputs = mx.np.random.randint(0, 10, (batch_size, sequence_length))
    token_types = mx.np.random.randint(0, 2, (batch_size, sequence_length))
    valid_length = mx.np.random.randint(3, sequence_length, (batch_size, ))
    masked_positions = mx.np.random.randint(0, 3, (batch_size, num_mask))

    electra_model = ElectraModel.from_cfg(cfg)
    electra_model.initialize()
    electra_model.hybridize()
    contextual_embedding, pooled_out = electra_model(inputs, token_types,
                                                     valid_length)
    electra_model_tn = ElectraModel.from_cfg(cfg_tn)
    electra_model_tn.share_parameters(electra_model.collect_params())
    electra_model_tn.hybridize()
    contextual_embedding_tn, pooled_out_tn = electra_model_tn(
        inputs.T, token_types.T, valid_length)
    assert_allclose(contextual_embedding.asnumpy(),
                    np.swapaxes(contextual_embedding_tn.asnumpy(), 0, 1), 1E-4,
                    1E-4)
    assert_allclose(pooled_out.asnumpy(), pooled_out_tn.asnumpy(), 1E-4, 1E-4)

Пример #2

Показать файл

def convert_tf_config(config_dict, vocab_size):
    """Convert the config file"""

    assert vocab_size == config_dict['vocab_size']
    cfg = ElectraModel.get_cfg().clone()
    cfg.defrost()
    cfg.MODEL.vocab_size = vocab_size
    cfg.MODEL.units = config_dict['hidden_size']
    cfg.MODEL.embed_size = config_dict['embedding_size']
    cfg.MODEL.hidden_size = config_dict['intermediate_size']
    cfg.MODEL.max_length = config_dict['max_position_embeddings']
    cfg.MODEL.num_heads = config_dict['num_attention_heads']
    cfg.MODEL.num_layers = config_dict['num_hidden_layers']
    cfg.MODEL.pos_embed_type = 'learned'
    cfg.MODEL.activation = config_dict['hidden_act']
    cfg.MODEL.layer_norm_eps = 1E-12
    cfg.MODEL.num_token_types = config_dict['type_vocab_size']
    cfg.MODEL.hidden_dropout_prob = float(config_dict['hidden_dropout_prob'])
    cfg.MODEL.attention_dropout_prob = float(
        config_dict['attention_probs_dropout_prob'])
    cfg.MODEL.dtype = 'float32'
    cfg.MODEL.generator_layers_scale = config_dict['generator_layers']
    cfg.MODEL.generator_units_scale = config_dict['generator_hidden_size']
    cfg.INITIALIZER.weight = [
        'truncnorm', 0, config_dict['initializer_range']
    ]  # TruncNorm(0, 0.02)
    cfg.INITIALIZER.bias = ['zeros']
    cfg.VERSION = 1
    cfg.freeze()
    return cfg

Пример #3

Показать файл

Файл: run_electra.py Проект: liangfu/gluon-nlp

def get_pretraining_model(model_name,
                          ctx_l,
                          max_seq_length=128,
                          hidden_dropout_prob=0.1,
                          attention_dropout_prob=0.1,
                          generator_units_scale=None,
                          generator_layers_scale=None):
    """
    A Electra Pretrain Model is built with a generator and a discriminator, in which
    the generator has the same embedding as the discriminator but different backbone.
    """
    cfg, tokenizer, _, _ = get_pretrained_electra(model_name,
                                                  load_backbone=False)
    cfg = ElectraModel.get_cfg().clone_merge(cfg)
    cfg.defrost()
    cfg.MODEL.hidden_dropout_prob = hidden_dropout_prob
    cfg.MODEL.attention_dropout_prob = attention_dropout_prob
    cfg.MODEL.max_length = max_seq_length
    # Keep the original generator size if not designated
    if generator_layers_scale:
        cfg.MODEL.generator_layers_scale = generator_layers_scale
    if generator_units_scale:
        cfg.MODEL.generator_units_scale = generator_units_scale
    cfg.freeze()

    model = ElectraForPretrain(cfg,
                               uniform_generator=False,
                               tied_generator=False,
                               tied_embeddings=True,
                               disallow_correct=False,
                               weight_initializer=TruncNorm(stdev=0.02))
    model.initialize(ctx=ctx_l)
    model.hybridize()
    return cfg, tokenizer, model

Пример #4

Показать файл

def test_electra_get_pretrained(model_name, ctx):
    assert len(list_pretrained_electra()) > 0
    with tempfile.TemporaryDirectory() as root, ctx:
        cfg, tokenizer, backbone_params_path, (disc_params_path, gen_params_path) =\
            get_pretrained_electra(model_name, root=root,
                                   load_backbone=True, load_disc=True, load_gen=True)
        assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
        electra_model = ElectraModel.from_cfg(cfg)
        electra_model.load_parameters(backbone_params_path)

        electra_disc_model = ElectraDiscriminator(cfg)
        electra_disc_model.load_parameters(disc_params_path)
        electra_disc_model = ElectraDiscriminator(cfg)
        electra_disc_model.backbone_model.load_parameters(backbone_params_path)

        gen_cfg = get_generator_cfg(cfg)
        electra_gen_model = ElectraGenerator(gen_cfg)
        electra_gen_model.load_parameters(gen_params_path)
        electra_gen_model.tie_embeddings(
            electra_disc_model.backbone_model.word_embed.collect_params(),
            electra_disc_model.backbone_model.token_type_embed.collect_params(
            ),
            electra_disc_model.backbone_model.token_pos_embed.collect_params(),
            electra_disc_model.backbone_model.embed_layer_norm.collect_params(
            ))

        electra_gen_model = ElectraGenerator(cfg)
        electra_gen_model.backbone_model.load_parameters(backbone_params_path)

Пример #5

Показать файл

def get_test_cfg():
    cfg = ElectraModel.get_cfg()
    cfg.defrost()
    cfg.MODEL.vocab_size = 100
    cfg.MODEL.units = 12 * 8
    cfg.MODEL.hidden_size = 128
    cfg.MODEL.num_heads = 2
    cfg.MODEL.num_layers = 2
    cfg.freeze()
    return cfg

Пример #6

Показать файл

def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu,
                     electra_path):
    ctx = mx.gpu(gpu) if gpu is not None else mx.cpu()
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_path)
    with open(os.path.join(save_dir, 'model.yml'), 'w') as of:
        of.write(cfg.dump())
    new_vocab = HuggingFaceWordPieceTokenizer(vocab_file=vocab_path,
                                              unk_token='[UNK]',
                                              pad_token='[PAD]',
                                              cls_token='[CLS]',
                                              sep_token='[SEP]',
                                              mask_token='[MASK]',
                                              lowercase=True).vocab
    new_vocab.save(os.path.join(save_dir, 'vocab.json'))

    # test input data
    batch_size = 3
    seq_length = 32
    num_mask = 5
    input_ids = np.random.randint(0, cfg.MODEL.vocab_size,
                                  (batch_size, seq_length))
    valid_length = np.random.randint(seq_length // 2, seq_length,
                                     (batch_size, ))
    input_mask = np.broadcast_to(np.arange(seq_length).reshape(1, -1), (batch_size, seq_length)) \
        < np.expand_dims(valid_length, 1)
    segment_ids = np.random.randint(0, 2, (batch_size, seq_length))
    mlm_positions = np.random.randint(0, seq_length // 2,
                                      (batch_size, num_mask))

    tf_input_ids = tf.constant(input_ids, dtype=np.int32)
    tf_input_mask = tf.constant(input_mask, dtype=np.int32)
    tf_segment_ids = tf.constant(segment_ids, dtype=np.int32)

    init_checkpoint = os.path.join(model_dir, 'electra_{}'.format(model_size))
    tf_params = read_tf_checkpoint(init_checkpoint)
    # get parameter names for tensorflow with unused parameters filtered out.
    tf_names = sorted(tf_params.keys())
    tf_names = filter(lambda name: not name.endswith('adam_m'), tf_names)
    tf_names = filter(lambda name: not name.endswith('adam_v'), tf_names)
    tf_names = filter(lambda name: name != 'global_step', tf_names)
    tf_names = filter(lambda name: name != 'generator_predictions/temperature',
                      tf_names)
    tf_names = list(tf_names)

    # reload the electra module for this local scope
    sys.path.append(electra_path)
    electra_dir = os.path.abspath(
        os.path.join(os.path.dirname(electra_path), os.path.pardir))
    sys.path.append(electra_dir)
    from electra.util.training_utils import get_bert_config
    from electra.configure_pretraining import PretrainingConfig
    from electra.model import modeling

    config = PretrainingConfig(model_name='',
                               data_dir='',
                               model_size=model_size)
    bert_config = get_bert_config(config)
    bert_model = modeling.BertModel(bert_config=bert_config,
                                    is_training=False,
                                    input_ids=tf_input_ids,
                                    input_mask=tf_input_mask,
                                    token_type_ids=tf_segment_ids,
                                    use_one_hot_embeddings=False,
                                    embedding_size=cfg.MODEL.embed_size)
    tvars = tf.trainable_variables()
    assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
        tvars, init_checkpoint)
    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # the name of the parameters are ending with ':0' like
        # 'electra/embeddings/word_embeddings:0'
        backbone_params = {v.name.split(":")[0]: v.read_value() for v in tvars}
        backbone_params = sess.run(backbone_params)
        tf_token_outputs_np = {
            'pooled_output': sess.run(bert_model.get_pooled_output()),
            'sequence_output': sess.run(bert_model.get_sequence_output()),
        }

    # The following part only ensure the parameters in backbone model are valid
    for k in backbone_params:
        assert_allclose(tf_params[k], backbone_params[k])

    # Build gluon model and initialize
    gluon_model = ElectraModel.from_cfg(cfg)
    gluon_model.initialize(ctx=ctx)
    gluon_model.hybridize()

    gluon_disc_model = ElectraDiscriminator(cfg)
    gluon_disc_model.initialize(ctx=ctx)
    gluon_disc_model.hybridize()

    gen_cfg = get_generator_cfg(cfg)
    disc_backbone = gluon_disc_model.backbone_model
    gluon_gen_model = ElectraGenerator(gen_cfg)
    gluon_gen_model.tie_embeddings(
        disc_backbone.word_embed.collect_params(),
        disc_backbone.token_type_embed.collect_params(),
        disc_backbone.token_pos_embed.collect_params(),
        disc_backbone.embed_layer_norm.collect_params())
    gluon_gen_model.initialize(ctx=ctx)
    gluon_gen_model.hybridize()

    # pepare test data
    mx_input_ids = mx.np.array(input_ids, dtype=np.int32, ctx=ctx)
    mx_valid_length = mx.np.array(valid_length, dtype=np.int32, ctx=ctx)
    mx_token_types = mx.np.array(segment_ids, dtype=np.int32, ctx=ctx)
    mx_masked_positions = mx.np.array(mlm_positions, dtype=np.int32, ctx=ctx)

    for convert_type in ['backbone', 'disc', 'gen']:
        name_map = get_name_map(tf_names, convert_type=convert_type)
        # go through the gluon model to infer the shape of parameters

        if convert_type == 'backbone':
            model = gluon_model
            contextual_embedding, pooled_output = model(
                mx_input_ids, mx_token_types, mx_valid_length)
        elif convert_type == 'disc':
            model = gluon_disc_model
            contextual_embedding, pooled_output, rtd_scores = \
                model(mx_input_ids, mx_token_types, mx_valid_length)
        elif convert_type == 'gen':
            model = gluon_gen_model
            contextual_embedding, pooled_output, mlm_scores = \
                model(mx_input_ids, mx_token_types, mx_valid_length, mx_masked_positions)

        # replace tensorflow parameter names with gluon parameter names
        mx_params = model.collect_params()
        all_keys = set(mx_params.keys())
        for (src_name, dst_name) in name_map.items():
            tf_param_val = tf_params[src_name]
            if dst_name is None:
                continue
            all_keys.remove(dst_name)
            if src_name.endswith('kernel'):
                mx_params[dst_name].set_data(tf_param_val.T)
            else:
                mx_params[dst_name].set_data(tf_param_val)

        # Merge query/kernel, key/kernel, value/kernel to encoder.all_encoder_groups.0.attn_qkv.weight
        def convert_qkv_weights(tf_prefix, mx_prefix):
            """
            To convert the qkv weights with different prefix.

            In tensorflow framework, the prefix of query/key/value for the albert model is
            'bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel',
            and that for the bert model is 'bert/encoder/layer_{}/attention/self/key/bias'.
            In gluonnlp framework, the prefix is slightly different as
            'encoder.all_encoder_groups.0.attn_qkv.weight' for albert model and
            'encoder.all_layers.{}.attn_qkv.weight' for bert model, as the
            curly braces {} can be filled with the layer number.
            """
            # Merge query_weight, key_weight, value_weight to mx_params
            query_weight = tf_params['{}/query/kernel'.format(tf_prefix)]
            key_weight = tf_params['{}/key/kernel'.format(tf_prefix)]
            value_weight = tf_params['{}/value/kernel'.format(tf_prefix)]
            mx_params['{}.attn_qkv.weight'.format(mx_prefix)].set_data(
                np.concatenate([query_weight, key_weight, value_weight],
                               axis=1).T)
            # Merge query_bias, key_bias, value_bias to mx_params
            query_bias = tf_params['{}/query/bias'.format(tf_prefix)]
            key_bias = tf_params['{}/key/bias'.format(tf_prefix)]
            value_bias = tf_params['{}/value/bias'.format(tf_prefix)]
            mx_params['{}.attn_qkv.bias'.format(mx_prefix)].set_data(
                np.concatenate([query_bias, key_bias, value_bias], axis=0))

        # The below parameters of the generator are already initialized in the
        # discriminator, no need to reload.
        disc_embed_params = set([
            'backbone_model.embed_layer_norm.beta',
            'backbone_model.embed_layer_norm.gamma',
            'backbone_model.token_pos_embed._embed.weight',
            'backbone_model.token_type_embed.weight', 'mlm_decoder.3.weight',
            'backbone_model.word_embed.weight'
        ])

        for key in all_keys:
            if convert_type == 'gen' and key in disc_embed_params:
                continue
            assert re.match(
                r'^(backbone_model\.){0,1}encoder\.all_encoder_layers\.[\d]+\.attn_qkv\.(weight|bias)$',
                key) is not None, 'Parameter key {} mismatch'.format(key)

        tf_prefix = None
        for layer_id in range(cfg.MODEL.num_layers):
            mx_prefix = 'encoder.all_encoder_layers.{}'.format(layer_id)
            if convert_type == 'gen':
                mx_prefix = 'backbone_model.' + mx_prefix
                tf_prefix = 'generator/encoder/layer_{}/attention/self'.format(
                    layer_id)
            elif convert_type == 'disc':
                mx_prefix = 'backbone_model.' + mx_prefix
                tf_prefix = 'electra/encoder/layer_{}/attention/self'.format(
                    layer_id)
            else:
                tf_prefix = 'electra/encoder/layer_{}/attention/self'.format(
                    layer_id)

            convert_qkv_weights(tf_prefix, mx_prefix)

        if convert_type == 'backbone':
            # test conversion results for backbone model
            if test_conversion:
                tf_contextual_embedding = tf_token_outputs_np[
                    'sequence_output']
                tf_pooled_output = tf_token_outputs_np['pooled_output']
                contextual_embedding, pooled_output = model(
                    mx_input_ids, mx_token_types, mx_valid_length)
                assert_allclose(pooled_output.asnumpy(), tf_pooled_output,
                                1E-3, 1E-3)
                for i in range(batch_size):
                    ele_valid_length = valid_length[i]
                    assert_allclose(
                        contextual_embedding[
                            i, :ele_valid_length, :].asnumpy(),
                        tf_contextual_embedding[i, :ele_valid_length, :], 1E-3,
                        1E-3)
            model.save_parameters(os.path.join(save_dir, 'model.params'),
                                  deduplicate=True)
            logging.info('Convert the backbone model in {} to {}/{}'.format(
                model_dir, save_dir, 'model.params'))
        elif convert_type == 'disc':
            model.save_parameters(os.path.join(save_dir, 'disc_model.params'),
                                  deduplicate=True)
            logging.info(
                'Convert the discriminator model in {} to {}/{}'.format(
                    model_dir, save_dir, 'disc_model.params'))
        elif convert_type == 'gen':
            model.save_parameters(os.path.join(save_dir, 'gen_model.params'),
                                  deduplicate=True)
            logging.info('Convert the generator model in {} to {}/{}'.format(
                model_dir, save_dir, 'gen_model.params'))

    logging.info('Conversion finished!')
    logging.info('Statistics:')

    old_names = os.listdir(save_dir)
    for old_name in old_names:
        new_name, long_hash = naming_convention(save_dir, old_name)
        old_path = os.path.join(save_dir, old_name)
        new_path = os.path.join(save_dir, new_name)
        shutil.move(old_path, new_path)
        file_size = os.path.getsize(new_path)
        logging.info('\t{}/{} {} {}'.format(save_dir, new_name, long_hash,
                                            file_size))