def test_compare_fileinfo(self): """Test comparing two FileInfo instances.""" f1 = FileInfo(path="fake/path", times=[ datetime.datetime(1900, 1, 1, 0), datetime.datetime(1900, 1, 1, 2) ], attr={}) f2 = FileInfo(path="fake/path", times=[ datetime.datetime(1900, 1, 1, 0), datetime.datetime(1900, 1, 1, 2) ], attr={}) f3 = FileInfo(path="other/fake/path", times=[ datetime.datetime(1900, 1, 1, 0), datetime.datetime(1900, 1, 1, 2) ], attr={}) f4 = FileInfo(path="fake/path", times=[ datetime.datetime(1910, 1, 1, 0), datetime.datetime(1910, 1, 1, 2) ], attr={}) assert f1 == f2 assert f1 != f3 assert f1 != f4 assert f2 != f3 assert f2 != f4 assert f3 != f4 assert f1 != "fake/path"
def test_sequence_placeholder(self): """Test find on all standard filesets. Returns: None """ filesets = self.init_filesets() # STANDARD DATASET # Should not find anything: empty = list( filesets["sequence-placeholder"].find( "2016-12-31", "2018-01-01", no_files_error=False )) assert not empty # Should find two files: found_files = list( filesets["sequence-placeholder"].find( "2018-01-01", "2018-01-02", )) check = [ FileInfo(join(self.refdir, 'sequence', '2018', '001', 'sequence0001.txt'), [datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 12, 0)], {'id': 1}), FileInfo(join(self.refdir, 'sequence', '2018', '001', 'sequence0002.txt'), [datetime.datetime(2018, 1, 1, 12, 0), datetime.datetime(2018, 1, 2, 0, 0)], {'id': 2}), ] assert found_files == check # Should find two files and should return them in two bins: found_files = list( filesets["sequence-placeholder"].find( "2018-01-01", "2018-01-02", bundle="6h", )) check = [ [ FileInfo(join(self.refdir, 'sequence', '2018', '001', 'sequence0001.txt'), [datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 12, 0)], {'id': 1}), ], [ FileInfo(join(self.refdir, 'sequence', '2018', '001', 'sequence0002.txt'), [datetime.datetime(2018, 1, 1, 12, 0), datetime.datetime(2018, 1, 2, 0, 0)], {'id': 2}), ], ] assert found_files == check
def test_complicated_subdirs(self, ): """Check whether FileSet can find files in subdirectories that contain text and placeholders. """ # The Pinocchio fileset from the cloud toolbox: a folder name contains # normal text and a placeholder: pinocchio = FileSet( join( self.refdir, "pinocchio", "t{year2}{month}{day}", "tm{year2}{month}{day}{hour}{minute}{second}{millisecond}.jpg", ), ) # Find all files: files = list(pinocchio) check = [ FileInfo( join(self.refdir, 'pinocchio', 't171102', 'tm171102132855573.jpg'), [ datetime.datetime(2017, 11, 2, 13, 28, 55, 573000), datetime.datetime(2017, 11, 2, 13, 28, 55, 573000) ], {}), ] assert files == check
def test_regex(self): filesets = self.init_filesets() check = [ FileInfo( join(self.refdir, 'regex', 'NSS.HIRX.NJ.D99127.S0632.E0820.B2241718.WI.gz'), [ datetime.datetime(1999, 5, 7, 6, 32), datetime.datetime(1999, 5, 7, 8, 20) ], { 'satcode': 'NJ', 'B': '2241718', 'station': 'WI' }), ] found_file = filesets["regex-HIRS"].find_closest("1999-05-08") assert found_file == check[0] assert found_file.attr == check[0].attr found_files = \ list(filesets["regex-HIRS"].find("1999-05-07", "1999-05-09")) assert found_files == check
def get_info(self, filename, **kwargs): """ Get info parameters from a file (time coverage, etc). Args: filename: Path and name of file or FileInfo object. Returns: A FileInfo object. """ timestamp = self._get_timestamp(filename) return FileInfo( filename.path, [timestamp, timestamp], )
def test_files_overlap_subdirectory(self): """A file covers a time period longer than its sub directory. """ filesets = self.init_filesets() filesets["tutorial"].set_placeholders(satellite="SatelliteA") found_file = filesets["tutorial"].find_closest("2018-01-03") check = FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '200000-000000.nc'), [ datetime.datetime(2018, 1, 2, 20, 0), datetime.datetime(2018, 1, 3, 0, 0) ], {'satellite': 'SatelliteA'}) assert found_file == check
def test_single(self, file_system): """Test find on the single fileset. Returns: None """ filesets = self.init_filesets(file_system) # STANDARD DATASET # Should not find anything: empty = list(filesets["single"].find("2016-12-31", "2018-01-01", no_files_error=False)) assert not empty check = [ FileInfo(join(self._refdir_for_fs(file_system), 'single_file.nc'), [ datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 3, 0, 0) ], {}), ] found_files = list(filesets["single"].find( "2018-01-01", "2018-01-02", )) assert found_files == check found_files = list(filesets["single"].find( "2018-01-01", "2018-01-02", bundle="12h", )) assert found_files == check found_files = list(filesets["single"].find( "2018-01-01", "2018-01-02", bundle=3, )) assert found_files == check
def get_info(self, filename, **kwargs): """Get the time coverage from a Pinocchio JPG image. Args: filename: Path and name of file or FileInfo object. Returns: A FileInfo object. """ # read image image = PIL.Image.open(filename, 'r') name2tagnum = dict((name, num) for num, name in TAGS.items()) time_string = image._getexif()[name2tagnum["DateTimeOriginal"]] time = datetime.datetime.strptime(time_string, "%Y:%m:%d %H:%M:%S") return FileInfo(filename[time, time], )
def test_tutorial(self): """Test the fileset examples of the tutorial. Returns: None """ filesets = self.init_filesets() # STANDARD DATASET # Should not find anything: empty = list(filesets["tutorial"].find("2017-12-31", "2018-01-01", no_files_error=False)) assert not empty # Find the closest file to 2018-01-01, limited to SatelliteB # temporarily: found_file = filesets["tutorial"].find_closest( "2018-01-01 03:00", filters={"!satellite": ("SatelliteA", "SatelliteC")}) #print("closest check", self._repr_file_info(found_file)) check = FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '000000-050000.nc'), [ datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 5, 0) ], {'satellite': 'SatelliteB'}) assert found_file == check # Limit this fileset to SatelliteB permanently filesets["tutorial"].set_placeholders(satellite="SatelliteB", ) # Should find four files: found_files = list(filesets["tutorial"].find( "2018-01-01", "2018-01-02", )) #print("four files:") # self._print_files(found_files) check = [ FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '000000-050000.nc'), [ datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 5, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '050000-100000.nc'), [ datetime.datetime(2018, 1, 1, 5, 0), datetime.datetime(2018, 1, 1, 10, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '100000-150000.nc'), [ datetime.datetime(2018, 1, 1, 10, 0), datetime.datetime(2018, 1, 1, 15, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '150000-200000.nc'), [ datetime.datetime(2018, 1, 1, 15, 0), datetime.datetime(2018, 1, 1, 20, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '200000-010000.nc'), [ datetime.datetime(2018, 1, 1, 20, 0), datetime.datetime(2018, 1, 2, 1, 0) ], {'satellite': 'SatelliteB'}), ] assert found_files == check # Should find four files and should return them in two bins: found_files = list(filesets["tutorial"].find( "2018-01-01", "2018-01-02", bundle="12h", )) # print("Bundle 12h:") # self._print_files(found_files) check = [ [ FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '000000-050000.nc'), [ datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 5, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '050000-100000.nc'), [ datetime.datetime(2018, 1, 1, 5, 0), datetime.datetime(2018, 1, 1, 10, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '100000-150000.nc'), [ datetime.datetime(2018, 1, 1, 10, 0), datetime.datetime(2018, 1, 1, 15, 0) ], {'satellite': 'SatelliteB'}), ], [ FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '150000-200000.nc'), [ datetime.datetime(2018, 1, 1, 15, 0), datetime.datetime(2018, 1, 1, 20, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '200000-010000.nc'), [ datetime.datetime(2018, 1, 1, 20, 0), datetime.datetime(2018, 1, 2, 1, 0) ], {'satellite': 'SatelliteB'}), ], ] assert found_files == check # Should find four files and should return them in two bins: found_files = list(filesets["tutorial"].find( "2018-01-01", "2018-01-02", bundle=3, )) # print("Bundle 3:") # self._print_files(found_files) check = [ [ FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '000000-050000.nc'), [ datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 1, 1, 5, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '050000-100000.nc'), [ datetime.datetime(2018, 1, 1, 5, 0), datetime.datetime(2018, 1, 1, 10, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '100000-150000.nc'), [ datetime.datetime(2018, 1, 1, 10, 0), datetime.datetime(2018, 1, 1, 15, 0) ], {'satellite': 'SatelliteB'}), ], [ FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '150000-200000.nc'), [ datetime.datetime(2018, 1, 1, 15, 0), datetime.datetime(2018, 1, 1, 20, 0) ], {'satellite': 'SatelliteB'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01', '200000-010000.nc'), [ datetime.datetime(2018, 1, 1, 20, 0), datetime.datetime(2018, 1, 2, 1, 0) ], {'satellite': 'SatelliteB'}), ], ] assert found_files == check for test_method in [FileSet.map, FileSet.imap]: # Check map method results = list( test_method(filesets["tutorial"], TestFileSet._tutorial_map, start="2018-01-01", end="2018-01-03")) check = [ 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB' ] assert results == check # Check map method on content results = list( test_method( filesets["tutorial"], TestFileSet._tutorial_map_content, start="2018-01-01", end="2018-01-03", on_content=True, )) check = [ 111.92121062601221, 24.438060320121387, -98.80775640366036, -75.84330354813459, 59.41297628327247, 106.80513550614192, -3.999061608822918, -108.68523313569861, -51.82441769876156, 66.33842832792985 ] assert np.allclose(results, check)
def test_glob(self): files = FileSet( join(self.refdir, "tutorial", "{satellite}", "*", "*.nc"), placeholder={"satellite": 'SatelliteA'}, ) self._print_files(list(files)) # Sort this after paths rather than times (because the times are all # equal) check = list( sorted([ FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '000000-040000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '080000-120000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '200000-000000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '040000-080000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '120000-160000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02', '160000-200000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '000000-040000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '080000-120000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '200000-000000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '040000-080000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '120000-160000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), FileInfo( join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01', '160000-200000.nc'), [ datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) ], {'satellite': 'SatelliteA'}), ], key=lambda x: x.path)) assert list(sorted(files, key=lambda x: x.path)) == check