Пример #1
0
def default_only_result_and_positions_config(output_col_prefix):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_positions=True,
        name='Positional result only default',
        description='Get the result field and the positions')
Пример #2
0
def default_multi_classifier_dl_config(output_col_prefix='classifier_dl'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        name='default_multi_classifier_dl',
        description='Get all predicted confidences and labels',
    )
Пример #3
0
def default_sentiment_vivk_config(output_col_prefix='vivk_sentiment'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        # pop_result_list     = True,
        name='Default sentiment vivk',
        description='Get prediction confidence and the resulting label')
Пример #4
0
def meta_NER_config(output_col_prefix='NER'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_meta=True,
        meta_white_list=['confidence'],
        name='default_ner',
        description='NER with IOB tags and confidences for them',
    )
Пример #5
0
def default_yake_config(output_col_prefix='keywords'):
    """Extracts YAKE keywords with confidences """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='default_yake',
        get_meta=True,
        meta_white_list=['score'],
        description='Get all keywords and their confidences',
    )
Пример #6
0
def default_ner_converter_config(output_col_prefix='ner_chunk'):
    """Extracts the Entity Labels, which are derived from the IOB Tags """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='default_ner',
        get_meta=True,
        meta_white_list=['entity'],
        description=
        'Converts IOB-NER representation into entity representation and generates confidences for the entire entity chunk',
    )
Пример #7
0
def default_sentiment_config(output_col_prefix='sentiment'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        # pop_result_list     = True,
        name='Only keep maximum sentiment confidence ',
        description=
        'Instead of returning the confidence for Postive and Negative, only the confidence of the more likely class will be returned in the confidence column',
        meta_data_extractor=SparkNLPExtractor(
            extract_maximum_confidence,
            'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.',
            'Maximum binary confidence'))
Пример #8
0
def default_language_classifier_config(output_col_prefix='language'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_meta=True,
        get_full_meta=True,
        pop_result_list=True,
        name='Only keep maximum language confidence',
        description=
        'Instead of returning the confidence for every language the Classifier was traiend on, only the maximum confidence will be returned',
        meta_data_extractor=SparkNLPExtractor(
            meta_extract_language_classifier_max_confidence,
            'Extract the maximum confidence from all classified languages and drop the others. TODO top k results',
            'Keep only top language confidence'))
Пример #9
0
def default_lang_classifier_config(output_col_prefix='sentiment_dl'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        pop_meta_list=True,
        pop_result_list=True,
        name='default_lang_classifier_config',
        description=
        'Instead of returning the confidence for every language, just returns the confidence of the most likely language',
        meta_data_extractor=SparkNLPExtractor(
            extract_maximum_confidence,
            'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.',
            'Maximum binary confidence'))
Пример #10
0
def default_full_config(output_col_prefix='DEFAULT'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_positions=True,
        get_begin=True,
        get_end=True,
        get_embeds=True,
        get_result=True,
        get_meta=True,
        get_full_meta=True,
        get_annotator_type=True,
        name='default_full',
        description=
        'Default full configuration, keeps all data and gets all metadata fields',
    )
Пример #11
0
def default_only_embedding_config(output_col_prefix):
    return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix,
                                   get_embeds=True,
                                   name='Default Embed extractor',
                                   description='Just get the Embed field')
Пример #12
0
def default_only_result_config(output_col_prefix):
    return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix,
                                   get_result=True,
                                   name='Default result extractor',
                                   description='Just gets the result field')
Пример #13
0
def default_document_config(output_col_prefix='document'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        pop_result_list=True,
        get_result=True,
    )