def test_get_durations():
    """
    Test durations.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_durations,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (unit, float(duration_units), float(duration_days))
            for unit, duration_units, duration_days in expected
        ])
def test_get_durations_source():
    """
    Test durations with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_durations,
        return_sources=True,
        expected_data_converter=lambda expected: [
            (unit, float(duration_units), float(duration_days), source)
            for unit, duration_units, duration_days, source in expected
        ])
Пример #3
0
def test_get_ratios():
    """
    Test ratio extraction.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_ratios,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (float(numerator) if numerator else None, float(consequent)
             if consequent else None, float(decimal) if decimal else None)
            for numerator, consequent, decimal in expected
        ])
Пример #4
0
def test_get_money_source():
    """
    Test money extraction with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_money,
        return_sources=True,
        expected_data_converter=lambda expected: [
            (float(amount) if amount else None, currency, source)
            for amount, currency, source in expected
            if amount or currency or source
        ])
Пример #5
0
def test_get_percents_source():
    """
    Test get percent behavior with source return.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_percents, return_sources=True,
                                                   expected_data_converter=lambda expected:
                                                   [(unit,
                                                     float(value_units) if value_units else None,
                                                     float(value_decimal) if value_decimal else None,
                                                     source)
                                                    for unit, value_units, value_decimal, source in expected
                                                    if unit or value_units or value_decimal or source])
def test_geoentities_alias_filtering():
    prepared_alias_blacklist = prepare_alias_blacklist_dict([
        ('Afghanistan', None, False), ('Mississippi', 'en', False),
        ('AL', 'en', True)
    ])
    lexnlp_tests.test_extraction_func_on_test_data(
        get_geoentities,
        geo_config_list=_CONFIG,
        prepared_alias_black_list=prepared_alias_blacklist,
        actual_data_converter=lambda actual:
        [get_entity_name(c[0]) for c in actual],
        debug_print=True,
        start_from_csv_line=6)
Пример #7
0
def test_geoentities_alias_filtering():
    prepared_alias_banlist = prepare_alias_banlist_dict([
        AliasBanRecord('Afghanistan', None, False),
        AliasBanRecord('Mississippi', 'en', False),
        AliasBanRecord('AL', 'en', True)
    ])
    lexnlp_tests.test_extraction_func_on_test_data(
        get_geoentities_routine,
        geo_config_list=_CONFIG,
        prepared_alias_ban_list=prepared_alias_banlist,
        actual_data_converter=lambda actual: [c[0].name for c in actual],
        debug_print=True,
        start_from_csv_line=6)
Пример #8
0
def test_get_distance():
    """
    Test distance extraction.
    :return:
    """
    # TODO: Do we need this separate method? test_get_distance_source()
    #   ... tests both distances and sources
    lexnlp_tests.test_extraction_func_on_test_data(
        func=get_distances,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (Decimal(distance), units) for distance, units in expected
        ])
Пример #9
0
def test_courts():
    court_df = pandas \
        .read_csv("https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
                  ".csv")

    # Create config objects
    court_config_list = []
    for _, row in court_df.iterrows():
        court_config_list.append(build_dictionary_entry(row))
    lexnlp_tests.test_extraction_func_on_test_data(
        get_courts,
        court_config_list=court_config_list,
        actual_data_converter=lambda actual: [cc[0].name for cc in actual])
Пример #10
0
def test_get_ratios_source():
    """
    Test ratio extraction with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        func=get_ratios,
        return_sources=True,
        expected_data_converter=lambda expected: [(
            Decimal(numerator) if numerator else None,
            Decimal(consequent) if consequent else None,
            Decimal(decimal) if decimal else None,
            source,
        ) for numerator, consequent, decimal, source in expected])
Пример #11
0
    def test_get_regulations_csv(self):
        """
        Test default get regulations behavior.
        :return:
        """
        test_data_path = os.path.join(
            lexnlp_test_path,
            'lexnlp/extract/en/tests/test_regulations/test_get_regulations.csv'
        )
        lexnlp_tests.test_extraction_func_on_test_data(
            get_regulations,
            expected_data_converter=lambda d: [
                (reg_type, reg_code) for reg_type, reg_code, _reg_str in d
            ],
            return_source=False,
            as_dict=False,
            test_data_path=test_data_path)
        lexnlp_tests.test_extraction_func_on_test_data(
            get_regulations,
            expected_data_converter=lambda d: [(reg_type, reg_code, reg_str)
                                               for reg_type, reg_code, reg_str
                                               in d],
            return_source=True,
            as_dict=False,
            test_data_path=test_data_path)

        cmp = DictionaryComparer(check_order=True)
        errors = []

        for (i, text, _input_args, expected) in \
                lexnlp_tests.iter_test_data_text_and_tuple(file_name=test_data_path):
            expected = [{
                'regulation_type': reg_type,
                'regulation_code': reg_code,
                'regulation_text': reg_str
            } for reg_type, reg_code, reg_str in expected]
            actual = list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=True,
                                                       as_dict=True))

            line_errors = cmp.compare_list_of_dicts(expected, actual)
            if line_errors:
                line_errors_str = '\n'.join(line_errors)
                errors.append(f'Regulation tests, line [{i + 1}] errors:\n' +
                              line_errors_str)

        if errors:
            raise Exception('\n\n'.join(errors))
Пример #12
0
def test_get_percents():
    """
    Test default get percent behavior.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_percents,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (unit, float(value_units)
             if value_units else None, float(value_decimal)
             if value_decimal else None)
            for unit, value_units, value_decimal in expected
            if unit or value_units or value_decimal
        ])
def test_get_regulations():
    """
    Test default get regulations behavior.
    :return:
    """

    lexnlp_tests.test_extraction_func_on_test_data(
        get_regulations,
        expected_data_converter=lambda d: [
            (reg_type, reg_code) for reg_type, reg_code, _reg_str in d
        ],
        return_source=False,
        as_dict=False)
    lexnlp_tests.test_extraction_func_on_test_data(
        get_regulations,
        expected_data_converter=lambda d: [
            (reg_type, reg_code, reg_str) for reg_type, reg_code, reg_str in d
        ],
        return_source=True,
        as_dict=False)

    # TODO Impl test_extraction_func_on_test_data() comparing lists of dicts
    for (_i, text, _input_args,
         expected) in lexnlp_tests.iter_test_data_text_and_tuple():
        expected_no_source_dict = [{
            'regulation_type': reg_type,
            'regulation_code': reg_code
        } for reg_type, reg_code, _reg_str in expected]
        expected_source_dict = [{
            'regulation_type': reg_type,
            'regulation_code': reg_code,
            'regulation_str': reg_str
        } for reg_type, reg_code, reg_str in expected]
        assert_list_equal(
            list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=False,
                                                       as_dict=True)),
            expected_no_source_dict)
        assert_list_equal(
            list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=True,
                                                       as_dict=True)),
            expected_source_dict)
Пример #14
0
def test_courts_longest_match():
    """
    Tests the case when there are courts having names/aliases being one a substring of another.
    In such case the court having longest alias should be returned for each conflicting matching.
    But for the case when there is another match of the court having shorter alias in that conflict,
    they both should be returned.
    :return:
    """
    courts_config_fn = os.path.join(os.path.dirname(lexnlp_tests.this_test_data_path()), 'us_courts.csv')
    courts_config_list = []
    with open(courts_config_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            cc = entity_config(row['Court ID'], row['Court Type'] + '|' + row['Court Name'], 0,
                               row['Alias'].split(';') if row['Alias'] else [],
                               name_is_alias=False)
            add_alias_to_entity(cc, row['Court Name'])

            courts_config_list.append(cc)
    lexnlp_tests.test_extraction_func_on_test_data(get_courts, court_config_list=courts_config_list,
                                                   actual_data_converter=lambda actual:
                                                   [tuple(c[0][1].split('|')) for c in actual],
                                                   debug_print=True)
Пример #15
0
def test_courts():
    """
    Test court extraction.
    :return:
    """

    # Read master data
    import pandas

    # Load court data
    court_df = pandas \
        .read_csv("https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
                  ".csv")

    # Create config objects
    court_config_list = []
    for _, row in court_df.iterrows():
        c = entity_config(row["Court ID"], row["Court Name"], 0,
                          row["Alias"].split(";") if not pandas.isnull(row["Alias"]) else [])
        court_config_list.append(c)

    lexnlp_tests.test_extraction_func_on_test_data(get_courts, court_config_list=court_config_list,
                                                   actual_data_converter=lambda actual:
                                                   [cc[0][1] for cc in actual])
Пример #16
0
 def test_normalize_text(self):
     lexnlp_tests.test_extraction_func_on_test_data(
         normalize_text,
         actual_data_converter=lambda text: (text, ),
         debug_print=True)
Пример #17
0
def test_trademarks():
    lexnlp_tests.test_extraction_func_on_test_data(get_trademarks)
Пример #18
0
def test_copyright():
    lexnlp_tests.test_extraction_func_on_test_data(get_copyright,
                                                   return_sources=True)
Пример #19
0
def test_stems_lowercase():
    lexnlp_tests.test_extraction_func_on_test_data(get_stem_list,
                                                   lowercase=True)
Пример #20
0
def test_stems():
    lexnlp_tests.test_extraction_func_on_test_data(
        get_stem_list,
        expected_data_converter=lambda stems: list(stem.lower()
                                                   for stem in stems)
        if stems else None)
Пример #21
0
def test_adverbs_lemma():
    lexnlp_tests.test_extraction_func_on_test_data(get_adverbs, lemmatize=True)
Пример #22
0
def test_adverbs():
    lexnlp_tests.test_extraction_func_on_test_data(get_adverbs)
Пример #23
0
def test_adjectives():
    lexnlp_tests.test_extraction_func_on_test_data(get_adjectives)
Пример #24
0
def test_nouns():
    lexnlp_tests.test_extraction_func_on_test_data(get_nouns)
Пример #25
0
def test_lemmas_sw():
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list,
                                                   stopword=True)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas, stopword=True)
Пример #26
0
def test_gpes():
    """
    Test get_geopolitical methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_geopolitical)
Пример #27
0
def test_persons():
    """
    Test get_persons methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_persons)
Пример #28
0
def test_lemmas():
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas)
Пример #29
0
def test_noun_phrases():
    """
    Test get_noun_phrases methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_noun_phrases)
Пример #30
0
def test_lemmas_lc():
    # Snowball returns lowercase always
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list,
                                                   lowercase=True)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas, lowercase=True)