Exemplo n.º 1
0
class LawsParser:
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }

        self.gesetze_parser = DataframeEntityParser(gesetze_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df, parse_columns, dependent_columns,
            preformed_entity)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(concept_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)

    def parse(self, text: str, locale: str = None) -> List[LawAnnotation]:
        res = []
        self.locale = locale if locale else 'de'
        res.extend(self.gesetze_parser.get_entity_list(text))
        res.extend(self.verordnungen_parser.get_entity_list(text))
        res.extend(self.concept_parser.get_entity_list(text))

        res_formatted = []  # type: List[LawAnnotation]
        for i in res:
            coords = (i.pop('location_start'), i.pop('location_end'))
            text = i.pop('source')
            ant = LawAnnotation(name=text,
                                coords=coords,
                                text=text,
                                locale=self.locale)
            # new_item.update(i)
            res_formatted.append(ant)
        return res_formatted
Exemplo n.º 2
0
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }

        self.gesetze_parser = DataframeEntityParser(gesetze_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df, parse_columns, dependent_columns,
            preformed_entity)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(concept_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)
Exemplo n.º 3
0
    def get_geoentities(self,
                     text: str,
                     config: pd.DataFrame,
                     parse_columns: Union[List[str], Tuple[str]] = None,
                     result_columns: Union[dict, None] = None,
                     preformed_entity: Union[dict, None] = None,
                     priority_sort_column: Union[str, None] = None,
                     priority_sort_ascending: bool = True,
                     cell_values_separator: Union[str, None] = ';',
                     unique_column_values: bool = True) -> Generator:

        parse_columns = parse_columns or self.default_selecting_columns

        yield from DataframeEntityParser(dataframe=config,
                                         parse_columns=parse_columns,
                                         result_columns=result_columns,
                                         preformed_entity=preformed_entity,
                                         priority_sort_column=priority_sort_column,
                                         priority_sort_ascending=priority_sort_ascending,
                                         cell_values_separator=cell_values_separator,
                                         unique_column_values=unique_column_values).get_entities(text)
Exemplo n.º 4
0
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = DeLanguageTokens.abbreviations
        split_params.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=split_params)

        self.gesetze_parser = DataframeEntityParser(
            gesetze_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(
            concept_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)
Exemplo n.º 5
0
 def get_entries(self, text: str, columns=None):
     columns = columns or self.default_columns
     parser = DataframeEntityParser(dataframe=entity_df,
                                    parse_columns=columns)
     return list(parser.get_entities(text))
Exemplo n.º 6
0
class LawsParser:
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = DeLanguageTokens.abbreviations
        split_params.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=split_params)

        self.gesetze_parser = DataframeEntityParser(
            gesetze_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(
            concept_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

    def parse(self, text: str, locale: str = None) -> List[LawAnnotation]:
        res = []
        self.locale = locale if locale else 'de'
        res.extend(self.gesetze_parser.get_entity_list(text))
        res.extend(self.verordnungen_parser.get_entity_list(text))
        res.extend(self.concept_parser.get_entity_list(text))

        res_formatted = []  # type: List[LawAnnotation]
        for i in res:
            coords = (i.pop('location_start'), i.pop('location_end'))
            text = i.pop('source')
            ant = LawAnnotation(name=text,
                                coords=coords,
                                text=text,
                                locale=self.locale)
            # new_item.update(i)
            res_formatted.append(ant)
        return res_formatted