Python split_paragraphs_rpsl示例，irrd.utils.text.split_paragraphs_rpsl Python示例

示例#1

0

显示文件

    def main(self, filename, strict_validation, database, show_info=True):
        self.show_info = show_info
        if database:
            self.database_handler = DatabaseHandler()
            self.database_handler.disable_journaling()

        if filename == '-':  # pragma: no cover
            f = sys.stdin
        else:
            f = open(filename, encoding='utf-8', errors='backslashreplace')

        for paragraph in split_paragraphs_rpsl(f):
            self.parse_object(paragraph, strict_validation)

        print(
            f'Processed {self.obj_parsed} objects, {self.obj_errors} with errors'
        )
        if self.obj_unknown:
            unknown_formatted = ', '.join(self.unknown_object_classes)
            print(
                f'Ignored {self.obj_unknown} objects due to unknown object classes: {unknown_formatted}'
            )

        if self.database_handler:
            self.database_handler.commit()
            self.database_handler.close()

示例#2

0

显示文件

文件： parsers.py 项目： haussli/irrd4

    def run_import(self):
        f = open(self.filename, encoding='utf-8', errors='backslashreplace')
        for paragraph in split_paragraphs_rpsl(f):
            self.parse_object(paragraph)

        self.log_report()
        f.close()

示例#3

0

显示文件

文件： query_qa_comparison.py 项目： snuggles/irrd4

    def clean(self, query: str, response: Optional[str]) -> Optional[str]:
        """Clean the query response, so that the text can be compared."""
        if not response:
            return response
        irr_query = query[:2].lower()
        response = response.strip().lower()

        cleaned_result_list = None
        if irr_query in SSP_QUERIES or (irr_query == '!r' and
                                        query.lower().strip().endswith(',o')):
            cleaned_result_list = response.split(' ')
        if irr_query in ['!6', '!g'] and cleaned_result_list:
            cleaned_result_list = [str(IP(ip)) for ip in cleaned_result_list]
        if cleaned_result_list:
            return ' '.join(sorted(list(set(cleaned_result_list))))
        else:
            new_responses = []
            for paragraph in split_paragraphs_rpsl(response):
                rpsl_obj = rpsl_object_from_text(paragraph.strip(),
                                                 strict_validation=False)
                new_responses.append(rpsl_obj)

            new_responses.sort(key=lambda i: i.parsed_data.get('source', '') +
                               i.rpsl_object_class + i.pk())
            texts = [r.render_rpsl_text() for r in new_responses]
            return '\n'.join(OrderedSet(texts))

示例#4

0

显示文件

文件： parsers.py 项目： irrdnet/irrd

    def _split_stream(self, data: str) -> None:
        """Split a stream into individual operations."""
        paragraphs = split_paragraphs_rpsl(data, strip_comments=False)
        last_comment_seen = ''

        for paragraph in paragraphs:
            if self._handle_possible_start_line(paragraph):
                continue
            elif paragraph.startswith('%') or paragraph.startswith('#'):
                last_comment_seen = paragraph
            elif paragraph.startswith('ADD') or paragraph.startswith('DEL'):
                self._handle_operation(paragraph, paragraphs)

        if self.nrtm_source and last_comment_seen.upper().strip(
        ) != f'%END {self.source}':
            msg = f'NRTM stream error for {self.source}: last comment paragraph expected to be ' \
                  f'"%END {self.source}", but is actually {last_comment_seen.upper().strip()}'
            logger.error(msg)
            self.database_handler.record_mirror_error(self.source, msg)
            raise ValueError(msg)

        if self._current_op_serial > self.last_serial and self.version != '3':
            msg = f'NRTM stream error for {self.source}: expected operations up to and including serial ' \
                  f'{self.last_serial}, last operation was {self._current_op_serial}'
            logger.error(msg)
            self.database_handler.record_mirror_error(self.source, msg)
            raise ValueError(msg)

        if self.last_serial > 0:
            self.database_handler.record_serial_newest_mirror(
                self.source, self.last_serial)

示例#5

0

显示文件

    def run_import(self) -> Optional[str]:
        """
        Run the actual import. If direct_error_return is set, returns an error
        string on encountering the first error. Otherwise, returns None.
        """
        f = open(self.filename, encoding='utf-8', errors='backslashreplace')
        for paragraph in split_paragraphs_rpsl(f):
            error = self._parse_object(paragraph)
            if error is not None:
                return error

        self.log_report()
        f.close()
        return None

示例#6

0

显示文件

文件： parsers.py 项目： haussli/irrd4

    def _split_stream(self, data: str) -> None:
        """Split a stream into individual operations."""
        paragraphs = split_paragraphs_rpsl(data, strip_comments=False)

        for paragraph in paragraphs:
            if self._handle_possible_start_line(paragraph):
                continue
            elif paragraph.startswith('%') or paragraph.startswith('#'):
                continue  # pragma: no cover -- falsely detected as not run by coverage library
            elif paragraph.startswith('ADD') or paragraph.startswith('DEL'):
                self._handle_operation(paragraph, paragraphs)

        if self._current_op_serial > self.last_serial and self.version != '3':
            msg = f'NRTM stream error for {self.source}: expected operations up to and including serial ' \
                  f'{self.last_serial}, last operation was {self._current_op_serial}'
            logger.error(msg)
            self.database_handler.record_mirror_error(self.source, msg)
            raise ValueError(msg)

        if self.last_serial > 0:
            self.database_handler.force_record_serial_seen(self.source, self.last_serial)

示例#7

0

显示文件

    def run_import(self) -> Optional[str]:
        """
        Run the actual import. If direct_error_return is set, returns an error
        string on encountering the first error. Otherwise, returns None.
        """
        f = open(self.filename, encoding='utf-8', errors='backslashreplace')
        for paragraph in split_paragraphs_rpsl(f):
            try:
                rpsl_obj = self._parse_object(paragraph)
            except RPSLImportError as e:
                if self.direct_error_return:
                    return e.message
            else:
                if rpsl_obj:
                    self.database_handler.upsert_rpsl_object(
                        rpsl_obj, origin=JournalEntryOrigin.mirror)

        self.log_report()
        f.close()
        if self.serial:
            self.database_handler.record_serial_seen(self.source, self.serial)

        return None

示例#8

0

显示文件

    def run_import(self) -> Optional[str]:
        """
        Run the actual import. If direct_error_return is set, returns an error
        string on encountering the first error. Otherwise, returns None.
        """
        objs_from_file = []
        f = open(self.filename, encoding='utf-8', errors='backslashreplace')
        for paragraph in split_paragraphs_rpsl(f):
            try:
                rpsl_obj = self._parse_object(paragraph)
            except RPSLImportError as e:
                if self.direct_error_return:
                    return e.message
            else:
                if rpsl_obj:
                    objs_from_file.append(rpsl_obj)
        f.close()

        query = RPSLDatabaseQuery(ordered_by_sources=False,
                                  enable_ordering=False,
                                  column_names=['rpsl_pk'
                                                ]).sources([self.source])
        current_pks = {
            row['rpsl_pk']
            for row in self.database_handler.execute_query(query)
        }

        file_objs_by_pk = {obj.pk(): obj for obj in objs_from_file}
        file_pks = set(file_objs_by_pk.keys())
        new_pks = file_pks - current_pks
        deleted_pks = current_pks - file_pks
        retained_pks = file_pks.intersection(current_pks)

        self.obj_new = len(new_pks)
        self.obj_deleted = len(deleted_pks)
        self.obj_retained = len(retained_pks)

        for rpsl_pk, file_obj in filter(lambda i: i[0] in new_pks,
                                        file_objs_by_pk.items()):
            self.database_handler.upsert_rpsl_object(
                file_obj, JournalEntryOrigin.synthetic_nrtm)

        for rpsl_pk in deleted_pks:
            self.database_handler.delete_rpsl_object(
                rpsl_pk=rpsl_pk,
                source=self.source,
                origin=JournalEntryOrigin.synthetic_nrtm)

        # This query does not filter on retained_pks. The expectation is that most
        # objects are retained, and therefore it is much faster to query the entire source.
        query = RPSLDatabaseQuery(ordered_by_sources=False,
                                  enable_ordering=False,
                                  column_names=['rpsl_pk', 'object_text'])
        query = query.sources([self.source])
        for row in self.database_handler.execute_query(query):
            try:
                file_obj = file_objs_by_pk[row['rpsl_pk']]
            except KeyError:
                continue
            if file_obj.render_rpsl_text() != remove_last_modified(
                    row['object_text']):
                self.database_handler.upsert_rpsl_object(
                    file_obj, JournalEntryOrigin.synthetic_nrtm)
                self.obj_modified += 1

        self.log_report()
        return None