示例#1
0
def convert_t5(args):
    logging.info('converting T5 model from Huggingface...')
    if not os.path.exists(args.dest_dir):
        os.mkdir(args.dest_dir)
    converted = {}
    # convert and save vocab
    convert_vocab(args, converted)
    # convert and save config
    gluon_cfg = Gluon_T5.get_cfg(args.tgt_model_name)
    gluon_cfg = convert_config(args, gluon_cfg, converted)
    # convert, (test), and save model
    hf_t5 = HF_T5.from_pretrained(args.model_name)
    gluon_t5 = Gluon_T5.from_cfg(gluon_cfg)
    gluon_t5 = convert_params(args, converted, hf_t5, gluon_t5)
    gluon_t5.hybridize()
    # test model if needed
    if args.test:
        test_conversion(args, hf_t5, gluon_t5)
    # rename with sha1sum
    rename(args, converted)
    logging.info('conversion completed.')
    logging.info('file statistics:')
    for item, new_path in converted.items():
        logging.info('filename: {}\tsize: {}\tsha1sum: {}'.format(
            os.path.basename(new_path), os.path.getsize(new_path),
            sha1sum(new_path)))
    return converted
示例#2
0
def test_t5_model(cfg_key, activation, ctx):
    with ctx:
        cfg = T5Model.get_cfg(cfg_key)
        cfg.defrost()
        cfg.MODEL.vocab_size = 256
        cfg.MODEL.d_model = 128
        cfg.MODEL.d_ff = 512
        cfg.MODEL.num_layers = 2
        cfg.MODEL.num_heads = 4
        cfg.MODEL.activation = activation
        cfg.MODEL.layout = 'NT'
        cfg.freeze()

        cfg_tn = cfg.clone()
        cfg_tn.defrost()
        cfg_tn.MODEL.layout = 'TN'
        cfg_tn.freeze()

        # test TN and NT consistency
        t5_model = T5Model.from_cfg(cfg)
        t5_model.initialize()
        t5_model.hybridize()
        t5_model_tn = T5Model.from_cfg(cfg_tn)
        t5_model_tn.share_parameters(t5_model.collect_params())
        t5_model_tn.hybridize()

        batch_size = 8
        src_length = 32
        tgt_length = 18
        src_data = np.random.randint(0, 255, (batch_size, src_length))
        src_valid_length = np.random.randint(src_length // 2, src_length,
                                             (batch_size, ))
        tgt_data = np.random.randint(0, 255, (batch_size, tgt_length))
        tgt_valid_length = np.random.randint(tgt_length // 4, tgt_length,
                                             (batch_size, ))

        out = t5_model(src_data, src_valid_length, tgt_data, tgt_valid_length)
        out_tn = t5_model_tn(src_data.T, src_valid_length, tgt_data.T,
                             tgt_valid_length)
        assert np.allclose(np.swapaxes(out, 0, 1), out_tn, 1E-5, 1E-5)

        # test consistency with various target valid length
        for shift in range(1, np.min(tgt_valid_length).item()):
            for partial_out in [
                    t5_model(src_data, src_valid_length, tgt_data[:, :-shift],
                             tgt_valid_length - shift),
                    t5_model(src_data, src_valid_length, tgt_data,
                             tgt_valid_length - shift)
            ]:
                for i in range(batch_size):
                    vl = tgt_valid_length[i].item() - shift
                    assert np.allclose(partial_out[i, :vl], out[i, :vl], 1E-5,
                                       1E-5)
示例#3
0
def convert_config(args, converted):
    print('converting cfg...')
    # download config
    gluon_cfg = Gluon_T5.get_cfg(T5_PRETRAINED_MODEL_MAP[args.model_name])
    with tempfile.TemporaryDirectory() as temp_dir:
        hf_cfg_path = os.path.join(temp_dir, 'config.json')
        download(url=T5_PRETRAINED_CONFIG_MAP[args.model_name],
                 path=hf_cfg_path)
        with open(hf_cfg_path, 'r') as f:
            hf_cfg = json.load(f)
        os.remove(hf_cfg_path)
    # update attributes
    cfg = gluon_cfg.clone()
    cfg.defrost()
    cfg.MODEL.vocab_size = hf_cfg['vocab_size']
    cfg.MODEL.d_model = hf_cfg['d_model']
    cfg.MODEL.d_kv = hf_cfg['d_kv']
    cfg.MODEL.d_ff = hf_cfg['d_ff']
    cfg.MODEL.num_layers = hf_cfg['num_layers']
    cfg.MODEL.num_heads = hf_cfg['num_heads']
    cfg.MODEL.layer_norm_eps = hf_cfg['layer_norm_epsilon']
    cfg.MODEL.dropout_prob = hf_cfg['dropout_rate']
    cfg.INITIALIZER.init_factor = hf_cfg['initializer_factor']
    cfg.freeze()
    # save config
    config_path = os.path.join(args.dest_dir, 'model.yml')
    with open(config_path, 'w') as f:
        f.write(cfg.dump())
    converted['config'] = config_path
    return cfg
示例#4
0
def test_t5_get_pretrained(ctx):
    with tempfile.TemporaryDirectory() as root, ctx:
        cfg, tokenizer, backbone_params_path, _ = get_pretrained_t5(
            'google_t5_small')
        assert cfg.MODEL.vocab_size >= len(tokenizer._sp_model)
        t5_model = T5Model.from_cfg(cfg)
        t5_model.load_parameters(backbone_params_path)
        t5_model.hybridize()
        t5_inference_model = T5Inference(t5_model)
        t5_inference_model.hybridize()
示例#5
0
def test_t5_inference(layout, activation, ctx):
    with ctx:
        cfg = T5Model.get_cfg('google_t5_small')
        cfg.defrost()
        cfg.MODEL.layout = layout
        cfg.MODEL.activation = activation
        cfg.freeze()

        model = T5Model.from_cfg(cfg)
        model.initialize()
        model.hybridize()

        # while keeping T5Model implementation consistent with Huggingface's, this
        # temporary class would help the backbone fit into the provided nmt tests.
        class TempWithHead(HybridBlock):
            def __init__(self, model):
                super().__init__()
                self.model = model
                self.layout = model.layout
                self.src_vocab_size = model.vocab_size
                self.tgt_vocab_size = model.vocab_size
                # append a final output layer
                self.output_layer = nn.Dense(units=model.vocab_size,
                                             in_units=model._d_model,
                                             flatten=False,
                                             use_bias=False,
                                             dtype=model._dtype)
                self.output_layer.weight = model.input_embedding_layer.weight

            def forward(self, *args, **kwargs):
                return self.output_layer(self.model(*args, **kwargs))

        backbone = TempWithHead(model)
        backbone.hybridize()
        verify_nmt_model(backbone)

        inference_model = T5Inference(model)
        inference_model.hybridize()
        verify_nmt_inference(train_model=backbone,
                             inference_model=inference_model)