def test_consecutive_fiilters(self, triage_setup): collection = ImageFileCollection( location=triage_setup.test_dir, keywords=['imagetyp', 'filter', 'object']) no_files_match = collection.files_filtered(object='fdsafs') assert (len(no_files_match) == 0) some_files_should_match = collection.files_filtered(object=None, imagetyp='light') assert (len(some_files_should_match) == triage_setup.n_test['light'])
def test_filter_does_not_not_permanently_change_file_mask(self, triage_setup): collection = ImageFileCollection(location=triage_setup.test_dir, keywords=['imagetyp']) # ensure all files are originally unmasked assert not collection.summary['file'].mask.any() # generate list that will match NO files collection.files_filtered(imagetyp='foisajfoisaj') # if the code works, this should have no permanent effect assert not collection.summary['file'].mask.any()
def test_filtered_files_have_proper_path(self, triage_setup): ic = ImageFileCollection(location=triage_setup.test_dir, keywords='*') # Get a subset of the files. plain_biases = ic.files_filtered(imagetyp='bias') # Force a copy... plain_biases = list(plain_biases) # Same subset, but with full path. path_biases = ic.files_filtered(imagetyp='bias', include_path=True) for path_b, plain_b in zip(path_biases, plain_biases): # If the path munging has been done properly, this will succeed. assert os.path.basename(path_b) == plain_b
def test_filter_files(self, triage_setup): img_collection = ImageFileCollection( location=triage_setup.test_dir, keywords=['imagetyp', 'filter']) assert len(img_collection.files_filtered( imagetyp='bias')) == triage_setup.n_test['bias'] assert len(img_collection.files) == triage_setup.n_test['files'] assert ('filter' in img_collection.keywords) assert ('flying monkeys' not in img_collection.keywords) assert len(img_collection.values('imagetyp', unique=True)) == 2
def test_fits_summary_when_keywords_are_not_subset(self, triage_setup): """ Catch case when there is overlap between keyword list passed to the ImageFileCollection and to files_filtered but the latter is not a subset of the former. """ ic = ImageFileCollection(triage_setup.test_dir, keywords=['imagetyp', 'exposure']) n_files = len(ic.files) files_missing_this_key = ic.files_filtered(imagetyp='*', monkeys=None) assert (n_files > 0) assert (n_files == len(files_missing_this_key))
def test_regex_match_for_search(self, triage_setup): # Test regex matching in searches ic = ImageFileCollection(triage_setup.test_dir) files = ic.files_filtered(regex_match=True, imagetyp='b.*s') assert len(files) == triage_setup.n_test['bias'] # This should return all of the files in the test set all_files = ic.files_filtered(regex_match=True, imagetyp='bias|light') assert len(all_files) == triage_setup.n_test['files'] # Add a column with more interesting content and see whether we # match that. ic.summary['match_me'] = [ 'hello', 'goodbye', 'bye', 'byte', 'good bye hello', 'dog' ] hello_anywhere = ic.files_filtered(regex_match=True, match_me='hello') assert len(hello_anywhere) == 2 hello_start = ic.files_filtered(regex_match=True, match_me='^hello') assert len(hello_start) == 1 # Is it really a case-insensitive match? hello_start = ic.files_filtered(regex_match=True, match_me='^HeLlo') assert len(hello_start) == 1 any_bye = ic.files_filtered(regex_match=True, match_me='by.*e') assert len(any_bye) == 4
def test_filter_files_whitespace_keys(self, triage_setup): hdr = fits.Header([('HIERARCH a b', 2)]) hdul = fits.HDUList([fits.PrimaryHDU(np.ones((10, 10)), header=hdr)]) hdul.writeto( os.path.join(triage_setup.test_dir, 'hdr_with_whitespace.fits')) ic = ImageFileCollection(location=triage_setup.test_dir) # Using a dictionary and unpacking it should work filtered = ic.files_filtered(**{'a b': 2}) assert len(filtered) == 1 assert 'hdr_with_whitespace.fits' in filtered # Also check it's working with generators: for _, filename in ic.data(a_b=2, replace_='_', return_fname=True): assert filename == 'hdr_with_whitespace.fits'
def test_force_detect_fits_files_finds_fits_files(self, triage_setup): # Tests for new feature # # https://github.com/astropy/ccdproc/issues/620 # # which supports adding all of the FITS files in a location based on # their *contents* instead of their *extension*. # Grab files from the default collection and make a copy with a new name # (and with no fits-like extension) # # Making a copy of *every* file means we can just double the expected # number of files as part of the tests. path = Path(triage_setup.test_dir) for idx, p in enumerate(path.iterdir()): new_name = 'no_extension{}'.format(idx) new_path = path / new_name new_path.write_bytes(p.read_bytes()) ic = ImageFileCollection(location=str(path), find_fits_by_reading=True) # Compressed files won't be automagically detected by reading the # first few bytes. expected_number = (2 * triage_setup.n_test['files'] - triage_setup.n_test['compressed']) assert len(ic.summary) == expected_number n_bias = (ic.summary['imagetyp'] == 'BIAS').sum() assert n_bias == 2 * triage_setup.n_test['bias'] # Only one file in the original set of test files has exposure time # 15, so there should be two now. assert len(ic.files_filtered(exposure=15.0)) == 2 # Try one of the generators expected_heads = (2 * triage_setup.n_test['light'] - triage_setup.n_test['compressed']) n_heads = 0 for h in ic.headers(imagetyp='light'): assert h['imagetyp'].lower() == 'light' n_heads += 1 assert n_heads == expected_heads
def test_image_collection_with_no_location(self, triage_setup): # Test for a feature requested in # # https://github.com/astropy/ccdproc/issues/374 # # and fix the bug reported in # # https://github.com/astropy/ccdproc/issues/662 # # Create a collection from a list of file names (which can include # path as needed) source_path = Path(triage_setup.test_dir) # Put the first three files in source_path into temp_path below # then create the image collection out of the three in temp_path and # the rest in source_path. source_files = [p for p in source_path.iterdir()] move_to_temp = source_files[:3] keep_in_source = source_files[3:] with TemporaryDirectory() as td: temp_dir = Path(td) file_paths = [] for source in move_to_temp: temp_path = temp_dir / source.name temp_path.write_bytes(source.read_bytes()) file_paths.append(str(temp_path)) file_paths.extend(str(p) for p in keep_in_source) # Move up a level to make sure we are not accidentally # pulling in files from the current working directory, # which includes everythin in source. os.chdir('..') ic = ImageFileCollection(filenames=file_paths) assert len(ic.summary) == len(file_paths) expected_name = \ get_pkg_data_filename('data/expected_ifc_file_properties.csv') expected = Table.read(expected_name) # Make the comparison more reliable by sorting expected.sort('file') actual = ic.summary # Write the actual IFC summary out to disk to turn bool into strings # of"True" and "False", and any other non-essential differences # between the tables. tmp_file = 'actual.csv' actual.write(tmp_file) actual = Table.read(tmp_file) # Make the comparison more reliable by sorting...but the actual # in this case includes paths, so we really want to sort by the # base name of the file. bases = np.array([Path(f).name for f in actual['file']]) sort_order = np.argsort(bases) actual = actual[sort_order] bases = bases[sort_order] assert all(Path(f).exists() for f in actual['file']) for column in expected.colnames: if column == 'file': assert np.all(bases == expected[column]) else: assert np.all(actual[column] == expected[column]) # Set comparisons don't care about order :) # Check several of the ways we can get file names from the # collection and ensure all of them include the path. assert set(file_paths) == set(ic.summary['file']) assert set(file_paths) == set(ic.files) assert set(file_paths) == set(ic.files_filtered(include_path=True)) # Spot check a couple of dtypes as a test for # https://github.com/astropy/ccdproc/issues/662 assert ic.summary['extend'].dtype == 'bool' # Of course, default dtypes on Windows are different. So instead # of comparing to something sensible like int64, compare to the # default int dtype. assert ic.summary['naxis1'].dtype == np.array([5]).dtype # and the default float dtype assert ic.summary['exposure'].dtype == np.array([5.0]).dtype expected_heads = (actual['imagetyp'] == 'LIGHT').sum() n_heads = 0 # Try one of the generators for h in ic.headers(imagetyp='light'): assert h['imagetyp'].lower() == 'light' n_heads += 1 assert n_heads == expected_heads
def test_files_filtered_with_full_path(self, triage_setup): ic = ImageFileCollection(triage_setup.test_dir, keywords=['naxis']) files = ic.files_filtered(naxis=1, include_path=True) for f in files: assert f.startswith(triage_setup.test_dir)
def test_filter_by_numerical_value(self, triage_setup): ic = ImageFileCollection(triage_setup.test_dir, keywords=['naxis']) should_be_zero = ic.files_filtered(naxis=2) assert len(should_be_zero) == 0 should_not_be_zero = ic.files_filtered(naxis=1) assert len(should_not_be_zero) == triage_setup.n_test['files']
def test_filter_fz_files(self, triage_setup): fn = 'test.fits.fz' ic = ImageFileCollection(location=triage_setup.test_dir, filenames=fn) # Get a subset of files with a specific header value filtered = ic.files_filtered(exposure=15.0) assert len(filtered) == 1
def test_filter_files_with_str_on_nonstr_column(self, triage_setup): ic = ImageFileCollection(location=triage_setup.test_dir) # Filtering an integer column with a string filtered = ic.files_filtered(naxis='2') assert len(filtered) == 0