Пример #1
0
 def _compute_md5_hash(self):
     """Method to compute the MD5 hash for the filename."""
     self._logger.info('Computing MD5 hash of the input file...')
     with open(self._filename, 'rb') as f:
         md5 = compute_md5_hash(f)
     self._logger.info('Computed MD5 hash of the input file')
     metadata.add_optional_job_metadata(self._metadata_conn,
                                        'dirbs-import',
                                        self.import_id,
                                        input_file_md5=md5)
Пример #2
0
def _populate_file_properties(config, file_list, run_id, perform_prevalidation,
                              logger):
    """Determine the attributes associated with the file."""
    uncataloged_files = []
    for file_name in file_list:
        file_properties = file_name['file_properties']
        file_path = file_name['file_path']
        is_valid_zip = None
        is_valid_format = None
        files_to_delete = []
        num_records = None
        uncompressed_size_bytes = None
        try:
            # Validate zip file
            extracted_file = extract_csv_from_zip(file_path)
            is_valid_zip = True
            if perform_prevalidation:
                is_valid_format = _prevalidate_file(config, extracted_file,
                                                    file_path,
                                                    file_properties.file_type,
                                                    run_id,
                                                    file_name['schema'],
                                                    files_to_delete, logger)
            num_records = sum(1 for _ in extracted_file)
            with zipfile.ZipFile(file_path) as file_test:
                uncompressed_size_bytes = file_test.getinfo(
                    extracted_file.name).file_size

        except BadZipFile as err:
            is_valid_zip = False
            logger.warn('The zip file is invalid: {0}'.format(
                file_properties.filename))
            logger.warn('Zip check error: {0}'.format(str(err)))
        except exceptions.PrevalidationCheckRawException as err:
            is_valid_format = False
            logger.warn(
                'Pre-validation failed for file: {0} with error: {1}'.format(
                    file_path, str(err)))
        finally:
            logger.debug('Cleanup: deleting intermediate data files...')
            for fn in files_to_delete:
                logger.debug('Deleted intermediate file {0}'.format(fn))
                remove(fn)
            logger.debug('Cleanup: deleted intermediate data files')

        # Compute MD5 hash
        logger.info('Computing MD5 hash of the input file...')
        with open(file_path, 'rb') as f:
            md5 = compute_md5_hash(f)
        logger.info('Computed MD5 hash')

        # Fetch extra attributes (if any)
        extra_attributes = _get_extra_attributes(file_path,
                                                 file_properties.file_type,
                                                 logger)
        file_attributes = CatalogAttributes(
            file_properties.filename, file_properties.file_type,
            file_properties.modified_time,
            file_properties.compressed_size_bytes, is_valid_zip,
            is_valid_format, md5, extra_attributes, uncompressed_size_bytes,
            num_records)
        uncataloged_files.append(file_attributes)
    return uncataloged_files