Пример #1
0
def transformer(model: str = 'small-t5', quantized: bool = False, **kwargs):
    """
    Load transformer to generate knowledge graphs in triples format from texts,
    MS text -> EN triples format.

    Parameters
    ----------
    model : str, optional (default='small-t5')
        Model architecture supported. Allowed values:

        * ``'t5'`` - T5 BASE parameters.
        * ``'small-t5'`` - T5 SMALL parameters.
        * ``'tiny-t5'`` - T5 TINY parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: malaya.model.t5.KnowledgeGraph class
    """
    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.knowledge_graph.available_transformer()`.'
        )

    return load_t5.load(module='knowledge-graph-triplet',
                        model=model,
                        model_class=KnowledgeGraph,
                        quantized=quantized,
                        **kwargs)
Пример #2
0
def isi_penting(model: str = 't5', quantized: bool = False, **kwargs):
    """
    Load Transformer model to generate a string given a isu penting.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'t5'`` - T5 BASE parameters.
        * ``'small-t5'`` - T5 SMALL parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: malaya.model.t5.Generator class
    """

    model = model.lower()
    if model not in _isi_penting_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.generator.available_isi_penting()`.'
        )

    return t5_load.load(
        module='generator',
        model=model,
        model_class=Generator,
        quantized=quantized,
        **kwargs,
    )
Пример #3
0
def transformer(model: str = 'small-t5', quantized: bool = False, **kwargs):
    """
    Load a Transformer Spell Corrector.

    Parameters
    ----------
    model : str, optional (default='small-t5')
        Model architecture supported. Allowed values:

        * ``'small-t5'`` - T5 SMALL parameters.
        * ``'tiny-t5'`` - T5 TINY parameters.
        * ``'super-tiny-t5'`` - T5 SUPER TINY parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: malaya.model.t5.Spell class
    """
    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.spell.available_transformer()`.'
        )
    return t5_load.load(
        module='spelling-correction',
        model=model,
        model_class=T5_Spell,
        quantized=quantized,
        **kwargs,
    )
Пример #4
0
def t5(
    model: str = 'base',
    compressed: bool = True,
    optimized: bool = False,
    **kwargs,
):
    """
    Load T5 model to generate a string given a isu penting.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'base'`` - T5 BASE parameters.
        * ``'small'`` - T5 SMALL parameters.

    compressed: bool, optional (default=True)
        Load compressed model, but this not able to utilize malaya-gpu function. 
        This only compressed model size, but when loaded into VRAM / RAM, size uncompressed and compressed are the same.
        We prefer un-compressed model due to compressed model prone to error.
    
    optimized : bool, optional (default=False)
        if True, will load optimized uncompressed model, remove unnecessary nodes and fold batch norm to reduce model size.
        Optimized model not necessary faster, totally depends on the machine. 
        We have no concrete proof optimized model maintain same accuracy as uncompressed model.

    Returns
    -------
    result: malaya.model.t5.GENERATOR class
    """

    model = model.lower()
    if model not in _t5_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.generator.available_t5()`.'
        )

    from malaya.path import PATH_GENERATOR, S3_PATH_GENERATOR

    from malaya.model.t5 import GENERATOR

    return t5_load.load(
        path=PATH_GENERATOR,
        s3_path=S3_PATH_GENERATOR,
        model=model,
        model_class=GENERATOR,
        compressed=compressed,
        quantized=optimized,
        **kwargs,
    )
Пример #5
0
def transformer(model: str = 't2t', quantized: bool = False, **kwargs):
    """
    Load Malaya transformer encoder-decoder model to generate a paraphrase given a string.

    Parameters
    ----------
    model : str, optional (default='t2t')
        Model architecture supported. Allowed values:

        * ``'t2t'`` - Malaya Transformer BASE parameters.
        * ``'small-t2t'`` - Malaya Transformer SMALL parameters.
        * ``'t5'`` - T5 BASE parameters.
        * ``'small-t5'`` - T5 SMALL parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: model
        List of model classes:
        
        * if `t2t` in model, will return `malaya.model.tf.Paraphrase`.
        * if `t5` in model, will return `malaya.model.t5.Paraphrase`.
    """

    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.paraphrase.available_transformer()`.'
        )

    if 't2t' in model:
        return transformer_load.load_lm(
            module='paraphrase',
            model=model,
            model_class=TF_Paraphrase,
            quantized=quantized,
            **kwargs,
        )
    if 't5' in model:
        return t5_load.load(
            module='paraphrase',
            model=model,
            model_class=T5_Paraphrase,
            quantized=quantized,
            **kwargs,
        )
Пример #6
0
def transformer(model: str = 'base', quantized: bool = False, **kwargs):
    """
    Load transformer encoder-decoder model to True Case.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'small'`` - Transformer SMALL parameters.
        * ``'base'`` - Transformer BASE parameters.
        * ``'super-tiny-t5'`` - T5 SUPER TINY parameters.
        * ``'super-super-tiny-t5'`` - T5 SUPER SUPER TINY parameters.
        * ``'3x-super-tiny-t5'`` - T5 3X SUPER TINY parameters.
        * ``'3x-super-tiny-t5-4k'`` - T5 3X SUPER TINY 4k vocab size parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: malaya.model.tf.TrueCase class
    """

    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.true_case.available_transformer()`.'
        )

    if 't5' in model:
        return t5_load.load(
            module='true-case',
            model=model,
            model_class=T5_TrueCase,
            quantized=quantized,
            **kwargs,
        )
    else:
        return load_transformer.load(
            module='true-case',
            model=model,
            encoder='yttm',
            model_class=TrueCase,
            quantized=quantized,
            **kwargs,
        )
Пример #7
0
def t5(model: str = 'base', compressed: bool = True, **kwargs):
    """
    Load T5 model to generate a string given a isu penting.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'base'`` - T5 BASE parameters.
        * ``'small'`` - T5 SMALL parameters.

    compressed: bool, optional (default=True)
        Load compressed model, but this not able to utilize malaya-gpu function. 
        This only compressed model size, but when loaded into VRAM / RAM, size uncompressed and compressed are the same.
        We prefer un-compressed model due to compressed model prone to error.

    Returns
    -------
    result: malaya.model.t5.GENERATOR class
    """

    model = model.lower()
    if model not in _t5_availability:
        raise ValueError(
            'model not supported, please check supported models from malaya.generator.available_t5()'
        )

    from malaya.path import PATH_GENERATOR, S3_PATH_GENERATOR

    model = model.lower()
    if model not in _t5_availability:
        raise ValueError(
            'model not supported, please check supported models from malaya.summarization.abstractive.available_t5()'
        )

    from malaya.model.t5 import GENERATOR

    return t5_load.load(
        path=PATH_GENERATOR,
        s3_path=S3_PATH_GENERATOR,
        model=model,
        model_class=GENERATOR,
        compressed=compressed,
        **kwargs,
    )
Пример #8
0
def transformer(model: str = 'small-t5', quantized: bool = False, **kwargs):
    """
    Load Malaya transformer encoder-decoder model to correct a `kesalahan tatabahasa` text.

    Parameters
    ----------
    model : str, optional (default='small-t5')
        Model architecture supported. Allowed values:

        * ``'t5'`` - T5 BASE parameters.
        * ``'small-t5'`` - T5 SMALL parameters.
        * ``'tiny-t5'`` - T5 TINY parameters.
        * ``'super-tiny-t5'`` - T5 SUPER TINY parameters.
        * ``'3x-super-tiny-t5'`` - T5 3X SUPER TINY parameters.

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model.
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: model
        List of model classes:

        * if `t5` in model, will return `malaya.model.t5.Tatabahasa`.
    """

    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.tatabahasa.available_transformer()`.'
        )
    return t5_load.load(
        module='kesalahan-tatabahasa',
        model=model,
        model_class=T5_Tatabahasa,
        quantized=quantized,
        **kwargs,
    )
Пример #9
0
def t5(model: str = 'base', compressed: bool = True, **kwargs):
    """
    Load T5 model to generate a paraphrase given a string.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'base'`` - T5 Base parameters.
        * ``'small'`` - T5 Small parameters.

    compressed: bool, optional (default=True)
        Load compressed model, but this not able to utilize malaya-gpu function. 
        This only compressed model size, but when loaded into VRAM / RAM, size uncompressed and compressed are the same.

    Returns
    -------
    result: malaya.model.t5.PARAPHRASE class
    """

    model = model.lower()
    if model not in _t5_availability:
        raise Exception(
            'model not supported, please check supported models from malaya.paraphrase.available_t5()'
        )

    from malaya.model.t5 import PARAPHRASE

    return t5_load.load(
        path=PATH_PARAPHRASE,
        s3_path=S3_PATH_PARAPHRASE,
        model=model,
        model_class=PARAPHRASE,
        compressed=compressed,
        **kwargs,
    )
Пример #10
0
def transformer(model: str = 't2t', quantized: bool = False, **kwargs):
    """
    Load Malaya transformer encoder-decoder model to generate a summary given a string.

    Parameters
    ----------
    model : str, optional (default='base')
        Model architecture supported. Allowed values:

        * ``'t2t'`` - Malaya Transformer BASE parameters.
        * ``'small-t2t'`` - Malaya Transformer SMALL parameters.
        * ``'t5'`` - T5 BASE parameters.
        * ``'small-t5'`` - T5 SMALL parameters.
        * ``'bigbird'`` - BigBird + Pegasus BASE parameters.
        * ``'small-bigbird'`` - BigBird + Pegasus SMALL parameters.
    
    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result: model
        List of model classes:
        
        * if `t2t` in model, will return `malaya.model.tf.Summarization`.
        * if `t5` in model, will return `malaya.model.t5.Summarization`.
        * if `bigbird` in model, will return `malaya.model.bigbird.Summarization`.
    """

    model = model.lower()
    if model not in _transformer_availability:
        raise ValueError(
            'model not supported, please check supported models from `malaya.summarization.abstractive.available_transformer()`.'
        )

    if 't2t' in model:
        return transformer_load.load_lm(
            module='abstractive-summarization',
            model=model,
            model_class=TF_Summarization,
            quantized=quantized,
            **kwargs,
        )

    if 't5' in model:
        return t5_load.load(
            module='abstractive-summarization',
            model=model,
            model_class=T5_Summarization,
            quantized=quantized,
            **kwargs,
        )

    if 'bigbird' in model:
        return bigbird_load.load_lm(
            module='abstractive-summarization',
            model=model,
            model_class=BigBird_Summarization,
            maxlen=_transformer_availability[model]['Suggested length'],
            quantized=quantized,
            **kwargs,
        )