Пример #1
0
def load(path,
         s3_path,
         model,
         encoder,
         model_class,
         quantized=False,
         **kwargs):
    check_file(path[model], s3_path[model], quantized=quantized, **kwargs)
    if quantized:
        model_path = 'quantized'
    else:
        model_path = 'model'

    g = load_graph(path[model][model_path], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path[model]['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    return model_class(
        X=g.get_tensor_by_name('import/Placeholder:0'),
        greedy=g.get_tensor_by_name('import/greedy:0'),
        beam=g.get_tensor_by_name('import/beam:0'),
        sess=generate_session(graph=g, **kwargs),
        encoder=encoder,
    )
Пример #2
0
def load(module, model, encoder, model_class, quantized=False, **kwargs):

    path = check_file(
        file=model,
        module=module,
        keys={
            'model': 'model.pb',
            'vocab': LM_VOCAB[module]
        },
        quantized=quantized,
        **kwargs,
    )
    g = load_graph(path['model'], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    inputs = ['Placeholder']
    outputs = ['greedy', 'beam']
    input_nodes, output_nodes = nodes_session(g, inputs, outputs)

    return model_class(
        input_nodes=input_nodes,
        output_nodes=output_nodes,
        sess=generate_session(graph=g, **kwargs),
        encoder=encoder,
    )
Пример #3
0
def load(path, s3_path, model, encoder, model_class, **kwargs):
    check_file(path[model], s3_path[model], **kwargs)
    g = load_graph(path[model]['model'], **kwargs)

    if encoder == 'subword':
        encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])

    if encoder == 'yttm':
        bpe, subword_mode = load_yttm(path[model]['vocab'], True)
        encoder = YTTMEncoder(bpe, subword_mode)

    return model_class(
        g.get_tensor_by_name('import/Placeholder:0'),
        g.get_tensor_by_name('import/greedy:0'),
        g.get_tensor_by_name('import/beam:0'),
        generate_session(graph=g, **kwargs),
        encoder,
    )
Пример #4
0
def transformer(model='base', **kwargs):
    """
    Load transformer encoder-decoder model to translate MS-to-EN.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'small'`` - transformer Small parameters.
        * ``'base'`` - transformer Base parameters.
        * ``'large'`` - transformer Large parameters.

    Returns
    -------
    result: malaya.model.tf.TRANSLATION class
    """
    model = model.lower()
    if model not in _transformer_availability:
        raise Exception(
            'model not supported, please check supported models from malaya.translation.ms_en.available_transformer()'
        )

    path = PATH_TRANSLATION['ms-en']
    s3_path = S3_PATH_TRANSLATION['ms-en']

    check_file(path[model], s3_path[model], **kwargs)
    g = load_graph(path[model]['model'], **kwargs)

    from malaya.text.t2t import text_encoder
    from malaya.model.tf import TRANSLATION

    encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])
    return TRANSLATION(
        g.get_tensor_by_name('import/Placeholder:0'),
        g.get_tensor_by_name('import/greedy:0'),
        g.get_tensor_by_name('import/beam:0'),
        generate_session(graph=g, **kwargs),
        encoder,
    )
Пример #5
0
def transformer(model='base', **kwargs):
    """
    Load transformer encoder-decoder model to generate a paraphrase given a string.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'base'`` - transformer Base parameters.
        * ``'tiny'`` - transformer Tiny parameters.
        * ``'tiny-bert'`` - BERT-BERT Tiny parameters.
        * ``'bert'`` - BERT-BERT Base parameters.

    Returns
    -------
    result: malaya.model.tf.PARAPHRASE class
    """

    model = model.lower()
    if model not in _transformer_availability:
        raise Exception(
            'model not supported, please check supported models from malaya.paraphrase.available_transformer()'
        )

    if 'bert' in model:

        path = PATH_PARAPHRASE[model]
        s3_path = S3_PATH_PARAPHRASE[model]

        check_file(path, s3_path, **kwargs)
        g = load_graph(path['model'])

        if model in ['bert', 'tiny-bert']:
            from malaya.text.bpe import sentencepiece_tokenizer_bert

            tokenizer = sentencepiece_tokenizer_bert(path['tokenizer'],
                                                     path['vocab'])

        from malaya.model.bert import PARAPHRASE_BERT

        return PARAPHRASE_BERT(
            X=g.get_tensor_by_name('import/Placeholder:0'),
            segment_ids=g.get_tensor_by_name('import/Placeholder_1:0'),
            input_masks=g.get_tensor_by_name('import/Placeholder_2:0'),
            logits=g.get_tensor_by_name('import/greedy:0'),
            sess=generate_session(graph=g),
            tokenizer=tokenizer,
        )

    else:
        path = PATH_PARAPHRASE['transformer']
        s3_path = S3_PATH_PARAPHRASE['transformer']

        check_file(path[model], s3_path[model], **kwargs)
        g = load_graph(path[model]['model'])

        from malaya.text.t2t import text_encoder
        from malaya.model.tf import PARAPHRASE

        encoder = text_encoder.SubwordTextEncoder(path[model]['vocab'])
        return PARAPHRASE(
            g.get_tensor_by_name('import/Placeholder:0'),
            g.get_tensor_by_name('import/greedy:0'),
            g.get_tensor_by_name('import/beam:0'),
            generate_session(graph=g),
            encoder,
        )