def test_parse_compressed_file(self): filename = path.join( path.dirname(__file__), '../testdata/test_file_parser.tsv.gz') parser = dsrf_file_parser.DSRFFileParser(self.logger, None, None, filename) parser.row_validators_list = self.row_validators_list for expected, actual in zip( self.expected_blocks, parser.parse_file(1)): self.assertMultiLineEqual(str(expected), str(actual)) self.assertEqual(self.logger._counts['error'], 0) self.assertEqual(self.logger._counts['warn'], 0) self.assertGreater(self.logger._counts['info'], 0)
def parse_report(self, files_list, dsrf_xsd_file, avs_xsd_file, human_readable=False, write_head=True): """Parses a dsrf report to block objects. The blocks are transferred to the queue. Args: files_list: A list of files in the report to parse. dsrf_xsd_file: Optional user-provided path to custom XSD. avs_xsd_file: Optional user-provided path to custom AVS XSD. human_readable: If True, write the block to the queue in a human readable form. Otherwise, write the block as a raw bytes. write_head: If set to False, the header will not be written to the queue. Returns: dsrf_logger.DSRFLogger object. """ file_path_to_name_map = { file_path: path.basename(file_path) for file_path in files_list } expected_components = constants.FILE_NAME_COMPONENTS self.logger.info('Validating the report file names.') report_validator = report_files_validators.ReportFilesValidator( file_name_validators.FileNameValidator(expected_components), self.logger) report_validator.validate_file_names( list(file_path_to_name_map.values())) blocks = defaultdict(set) for file_path, file_name in six.iteritems(file_path_to_name_map): file_parser = dsrf_file_parser.DSRFFileParser( self.logger, dsrf_xsd_file, avs_xsd_file, file_path) file_name_dict = file_name_validators.FileNameValidator.split_file_name( file_name, expected_components) file_number = file_name_dict['x'] self.logger.info('Start parsing file number %s.', file_number) for block in file_parser.parse_file(int(file_number)): if block.type == block_pb2.BODY: for compared_file_number, file_blocks in six.iteritems( blocks): if block.number in file_blocks: raise error.ReportValidationFailure( 'The block number %s is not unique. It appears in files ' 'number: %s and %s.' % (block.number, min(file_number, compared_file_number), max(file_number, compared_file_number))) blocks[file_number].add(block.number) elif block.type == block_pb2.HEAD: try: self.validate_head_block(block, file_name, file_name_dict) except error.FileNameValidationFailure as e: self.logger.error(e) if not write_head: # Skip writing the header to the queue, if requested. continue else: # FOOT continue self.write_to_queue(block, self.logger, human_readable) try: self.logger.raise_if_fatal_errors_found() except error.ReportValidationFailure as e: sys.stderr.write(constants.COLOR_RED + constants.BOLD + '\n[Cell validation] ' + str(e) + constants.ENDC) return self.logger
def _get_file_parser(self, row_validators=None): parser = dsrf_file_parser.DSRFFileParser( self.logger, None, None, 'filename') parser.row_validators_list = row_validators or self.row_validators_list return parser