def test_file_opener_3(self): """Test file_opener with PathLike object""" # prepare test class RawBytes(os.PathLike): def __init__(self, filename, raw_bytes): self.filename = str(filename) self.raw_bytes = raw_bytes def __fspath__(self): return self.filename def open(self): return io.BytesIO(self.raw_bytes) filename = '/path/to/file' file_bytes = b'TestTestTest' test_pathlike = RawBytes(filename, file_bytes) with file_opener(test_pathlike) as f: content = f.read() self.assertEqual(content, file_bytes) # test with lazy loading open method (open only in context) class RawBytesLazy(RawBytes): def open(self): self.lazy_opener_mock = mock.MagicMock() self.lazy_opener_mock.__enter__.return_value = io.BytesIO(self.raw_bytes) return self.lazy_opener_mock test_pathlike = RawBytesLazy(filename, file_bytes) with file_opener(test_pathlike) as f: content = f.read() self.assertEqual(content, file_bytes) test_pathlike.lazy_opener_mock.__exit__.assert_called_once_with(None, None, None)
def read_header(cls, filename, fileobj=None): """Read the file header. Args: filename (str): Path to GAC/LAC file fileobj: An open file object to read from. (optional) Returns: archive_header (struct): archive header header (struct): file header """ with file_opener(fileobj or filename) as fd_: # read ars_header if present _ars_head, = np.frombuffer(fd_.read(ars_header.itemsize), dtype=ars_header, count=1) if _ars_head['data_format'].startswith(b'NOAA Level 1b'): ars_head = _ars_head.copy() else: fd_.seek(0) ars_head = None # need to copy frombuffer to have write access on head head, = np.frombuffer(fd_.read(header.itemsize), dtype=header, count=1).copy() head = cls._correct_data_set_name(head, filename) cls._validate_header(head) return ars_head, head
def test_file_opener_2(self): """Test file_opener with file objects and compression""" # prepare test normal_message = b'normal message' gzip_message_decoded = b'gzip message' with io.BytesIO() as f: with gzip.open(f, mode='wb') as g: g.write(gzip_message_decoded) f.seek(0) gzip_message_encoded = f.read() # on normal file (check also if it remains open) with io.BytesIO(normal_message) as f: with file_opener(f) as g: message = g.read() self.assertFalse(f.closed) self.assertEqual(message, normal_message) # on gzip file with io.BytesIO(gzip_message_encoded) as f: with file_opener(f) as g: message = g.read() self.assertEqual(message, gzip_message_decoded)
def read_header(cls, filename, fileobj=None): """Read the file header. Args: filename (str): Path to GAC/LAC file fileobj: An open file object to read from. (optional) Returns: archive_header (struct): archive header header (struct): file header """ # choose the right header depending on the date with file_opener(fileobj or filename) as fd_: # read tbm_header if present _tbm_head, = np.frombuffer( fd_.read(tbm_header.itemsize), dtype=tbm_header, count=1) try: data_set_name = _tbm_head['data_set_name'].decode() except UnicodeDecodeError: data_set_name = '---' allowed_empty = (42*b'\x00' + b' ') if (cls.data_set_pattern.match(data_set_name) or (_tbm_head['data_set_name'] == allowed_empty)): tbm_head = _tbm_head.copy() tbm_offset = tbm_header.itemsize else: fd_.seek(0) tbm_head = None tbm_offset = 0 # read header head0, = np.frombuffer( fd_.read(header0.itemsize), dtype=header0, count=1) year, jday, _ = cls.decode_timestamps(head0["start_time"]) start_date = (datetime.date(year, 1, 1) + datetime.timedelta(days=int(jday) - 1)) if start_date < datetime.date(1992, 9, 8): header = header1 elif start_date <= datetime.date(1994, 11, 15): header = header2 else: header = header3 fd_.seek(tbm_offset, 0) # need to copy frombuffer to have write access on head head, = np.frombuffer( fd_.read(header.itemsize), dtype=header, count=1).copy() head = cls._correct_data_set_name(head, filename) cls._validate_header(head) return tbm_head, head
def read(self, filename, fileobj=None): """Read the data. Args: filename: Path to GAC/LAC file fileobj: An open file object to read from. (optional) Returns: header: numpy record array The header metadata scans: numpy record array The scanlines """ # Note that np.fromfile does not work with gzip.GzipFile # objects (numpy version 1.16.4), because it restricts the # file objects to (io.FileIO, io.BufferedReader, io.BufferedWriter) # see: numpy.compat.py3k.isfileobj self.filename = filename LOG.info('Reading %s', self.filename) with file_opener(fileobj or filename) as fd_: self.ars_head, self.head = self.read_header(filename, fileobj=fd_) if self.ars_head: ars_offset = ars_header.itemsize else: ars_offset = 0 self.header_version = self.head[ "noaa_level_1b_format_version_number"] if self.header_version >= 5: self.analog_telemetry, = np.frombuffer( fd_.read(analog_telemetry_v5.itemsize), dtype=analog_telemetry_v5, count=1) else: self.analog_telemetry, = np.frombuffer( fd_.read(analog_telemetry_v2.itemsize), dtype=analog_telemetry_v2, count=1) # LAC: 1, GAC: 2, ... self.data_type = self.head['data_type_code'] # read until end of file fd_.seek(self.offset + ars_offset, 0) buffer = fd_.read() count = self.head["count_of_data_records"] self._read_scanlines(buffer, count) self.correct_scan_line_numbers() self.spacecraft_id = self.head["noaa_spacecraft_identification_code"] self.spacecraft_name = self.spacecraft_names[self.spacecraft_id]
def process_file(filename, start_line, end_line, fileobj=None): """Read, calibrate and navigate NOAA AVHRR GAC/LAC POD/KLM data. It creates three hdf5 files in the output location given by the pygac config file. The three files contain the avhrr data, quality flags, and sunsatangles. Argsuments filename (str): Path to GAC/LAC file start_line (int): First scanline to be processed (0-based) end_line (int): Last scanline to be processed (0-based), set to 0 for the last available scanline fileobj: An open file object to read from. (optional) Note This function expects an initialized config file. """ tic = datetime.datetime.now() LOG.info("Process file: %s", str(filename)) # reader specific values config = get_config() tle_dir = config.get('tle', 'tledir', raw=True) tle_name = config.get('tle', 'tlename', raw=True) coeffs_file = config.get("calibration", "coeffs_file", fallback='') # output specific values output_dir = config.get('output', 'output_dir', raw=True) output_file_prefix = config.get('output', 'output_file_prefix', raw=True) avhrr_dir = os.environ.get('SM_AVHRR_DIR') qual_dir = os.environ.get('SM_AVHRR_DIR') sunsatangles_dir = os.environ.get('SM_SUNSATANGLES_DIR') # Keep the file open while searching for the reader class and later # creation of the instance. with file_opener(fileobj or filename) as open_file: reader_cls = get_reader_class(filename, fileobj=open_file) reader = reader_cls( tle_dir=tle_dir, tle_name=tle_name, calibration_file=coeffs_file ) reader.read(filename, fileobj=fileobj) reader.save( start_line, end_line, output_file_prefix=output_file_prefix, output_dir=output_dir, avhrr_dir=avhrr_dir, qual_dir=qual_dir, sunsatangles_dir=sunsatangles_dir ) LOG.info("Processing took: %s", str(datetime.datetime.now() - tic))
def read(self, filename, fileobj=None): """Read the data. Args: filename (str): Path to GAC/LAC file fileobj: An open file object to read from. (optional) Returns: header: numpy record array The header metadata scans: numpy record array The scanlines """ self.filename = filename LOG.info('Reading %s', self.filename) # choose the right header depending on the date with file_opener(fileobj or filename) as fd_: self.tbm_head, self.head = self.read_header( filename, fileobj=fd_) if self.tbm_head: tbm_offset = tbm_header.itemsize else: tbm_offset = 0 # read scan lines until end of file fd_.seek(self.offset + tbm_offset, 0) buffer = fd_.read() count = self.head["number_of_scans"] self._read_scanlines(buffer, count) year, jday, _ = self.decode_timestamps(self.head["start_time"]) start_date = (datetime.date(year, 1, 1) + datetime.timedelta(days=int(jday) - 1)) self.correct_scan_line_numbers() self.spacecraft_id = self.head["noaa_spacecraft_identification_code"] if self.spacecraft_id == 1 and start_date < datetime.date(1982, 1, 1): self.spacecraft_id = 25 self.spacecraft_name = self.spacecraft_names[self.spacecraft_id] LOG.info( "Reading %s data", self.spacecrafts_orbital[self.spacecraft_id]) return self.head, self.scans
def process_file(filename, start_line, end_line, fileobj=None): """Read, calibrate and navigate NOAA AVHRR GAC/LAC POD/KLM data. It creates three hdf5 files in the output location given by the pygac config file. The three files contain the avhrr data, quality flags, and sunsatangles. Args: filename (str): Path to GAC/LAC file start_line (int): First scanline to be processed (0-based) end_line (int): Last scanline to be processed (0-based), set to 0 for the last available scanline fileobj: An open file object to read from. (optional) """ tic = datetime.datetime.now() LOG.info("Process file: %s", str(filename)) # Keep the file open while searching for the reader class and later # creation of the instance. with file_opener(fileobj or filename) as open_file: reader_cls = get_reader_class(filename, fileobj=open_file) reader = reader_cls() reader.read(filename, fileobj=fileobj) reader.save(start_line, end_line) LOG.info("Processing took: %s", str(datetime.datetime.now() - tic))
def test_file_opener_1(self): """Test if a file is redirected correctly through file_opener.""" with file_opener('path/to/file') as f: content = f.read() self.assertEqual(content, 'file content')