Пример #1
0
    def make_en_parser(self):
        url = "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts.csv"

        ptrs = ParserInitParams()
        ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
        ptrs.dataframe_paths = [url]
        ptrs.split_ptrs = LineSplitParams()
        ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(
            set(EnLanguageTokens.conjunctions))
        ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
        ptrs.split_ptrs.abbr_ignore_case = True

        parser = UniversalCourtsParser(ptrs)
        return parser
Пример #2
0
def setup_en_parser():
    ptrs = ParserInitParams()
    file_path = os.path.join(lexnlp_base_path, 'lexnlp/config/en')
    ptrs.dataframe_paths = ['us_state_courts.csv',
                            'us_courts.csv',
                            'ca_courts.csv',
                            'au_courts.csv']
    ptrs.dataframe_paths = [os.path.join(file_path, p)
                            for p in ptrs.dataframe_paths]

    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EnLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True
    ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
    return UniversalCourtsParser(ptrs)
Пример #3
0
def setup_en_parser():
    ptrs = ParserInitParams()
    ptrs.dataframe_paths = [
        os.path.join(os.path.dirname(__file__),
                     "../../config/en/us_state_courts.csv"),
        os.path.join(os.path.dirname(__file__),
                     "../../config/en/us_courts.csv"),
        os.path.join(os.path.dirname(__file__),
                     "../../config/en/ca_courts.csv"),
        os.path.join(os.path.dirname(__file__),
                     "../../config/en/au_courts.csv")
    ]
    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','
                                   }.union(set(EnLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True
    ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
    return UniversalCourtsParser(ptrs)
Пример #4
0
def setup_de_parser():
    def preproc_func(text):
        return re.sub('e$', '[e]?', text)

    ptrs = ParserInitParams()
    ptrs.key_word_preproc_func = preproc_func
    ptrs.court_pattern_checker = re.compile('gericht')
    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','
                                   }.union(set(DeLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = DeLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True

    ptrs.column_names = {
        'type': 'Court Type (de-DE)',
        'name': 'Court Name (de-DE)',
        'jurisdiction': 'Jurisdiction',
        'alias': 'Alias (de-DE)'
    }

    path = os.path.join(os.path.dirname(__file__),
                        "../../config/de/de_courts.csv")
    ptrs.dataframe_paths = [path]
    return UniversalCourtsParser(ptrs)
Пример #5
0
 def init_parser():
     split_params = LineSplitParams()
     split_params.line_breaks = {'\n', '.', ';', '!', '?'}
     split_params.abbreviations = DeLanguageTokens.abbreviations
     split_params.abbr_ignore_case = True
     CopyrightDeParser.line_processor = LineProcessor(line_split_params=split_params)