def test_random_access(): """Test basic random access TODO: test random access when time info is not provided directly """ with tempfile.TemporaryDirectory() as temp_dir: # Hack to enable testing if only required chunks are loaded Peaks.rechunk_on_save = False st = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) with pytest.raises(strax.DataNotAvailable): # Time range selection requires data already available st.get_df(run_id, 'peaks', time_range=(3, 5)) st.make(run_id=run_id, targets='peaks') # Second part of hack: corrupt data by removing one chunk dirname = str(st._key_for(run_id, 'peaks')) os.remove( os.path.join(temp_dir, dirname, strax.dirname_to_prefix(dirname) + '-000000')) with pytest.raises(FileNotFoundError): st.get_array(run_id, 'peaks') df = st.get_array(run_id, 'peaks', time_range=(3, 5)) assert len(df) == 2 * recs_per_chunk assert df['time'].min() == 3 assert df['time'].max() == 4
def test_filestore(): with tempfile.TemporaryDirectory() as temp_dir: mystrax = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) assert not mystrax.is_stored(run_id, 'peaks') mystrax.scan_runs() assert mystrax.list_available('peaks') == [] mystrax.make(run_id=run_id, targets='peaks') assert mystrax.is_stored(run_id, 'peaks') mystrax.scan_runs() assert mystrax.list_available('peaks') == [run_id] assert mystrax.scan_runs()['name'].values.tolist() == [run_id] # We should have two directories data_dirs = sorted(glob.glob(osp.join(temp_dir, '*/'))) assert len(data_dirs) == 2 # The first dir contains peaks. # It should have one data chunk (rechunk is on) and a metadata file prefix = strax.dirname_to_prefix(data_dirs[0]) assert sorted(os.listdir( data_dirs[0])) == [f'{prefix}-000000', f'{prefix}-metadata.json'] # Check metadata got written correctly. metadata = mystrax.get_meta(run_id, 'peaks') assert len(metadata) assert 'writing_ended' in metadata assert 'exception' not in metadata assert len(metadata['chunks']) == 1 # Check data gets loaded from cache, not rebuilt md_filename = osp.join(data_dirs[0], f'{prefix}-metadata.json') mtime_before = osp.getmtime(md_filename) df = mystrax.get_array(run_id=run_id, targets='peaks') assert len(df) == recs_per_chunk * n_chunks assert mtime_before == osp.getmtime(md_filename) # Test the zipfile store. Zipping is still awkward... zf = osp.join(temp_dir, f'{run_id}.zip') strax.ZipDirectory.zip_dir(temp_dir, zf, delete=True) assert osp.exists(zf) mystrax = strax.Context(storage=strax.ZipDirectory(temp_dir), register=[Records, Peaks]) metadata_2 = mystrax.get_meta(run_id, 'peaks') assert metadata == metadata_2
def _find(self, key, write, allow_incomplete, fuzzy_for, fuzzy_for_options): assert not write # Check exact match / write case bk = self._backend_key(key) with zipfile.ZipFile(self._zipname(key)) as zp: try: dirname = str(key) prefix = strax.dirname_to_prefix(dirname) zp.getinfo(f'{dirname}/{prefix}-metadata.json') return bk except KeyError: pass if not len(fuzzy_for) and not len(fuzzy_for_options): raise strax.DataNotAvailable raise NotImplementedError("Fuzzy matching within zipfiles not yet " "implemented")
def test_raise_corruption(self): self.set_save_when('ALWAYS') self.st.make(self.test_run_id, self.target) assert self.is_stored() storage = self.st.storage[0] data_key = self.st.key_for(self.test_run_id, self.target) data_path = os.path.join(storage.path, str(data_key)) assert os.path.exists(data_path) metadata = storage.backends[0].get_metadata(data_path) assert isinstance(metadata, dict) # copied from FileSytemBackend (maybe abstractify the method separately?) prefix = strax.dirname_to_prefix(data_path) metadata_json = f'{prefix}-metadata.json' md_path = os.path.join(data_path, metadata_json) assert os.path.exists(md_path) # Corrupt the metadata (making it non-JSON parsable) md_file = open(md_path, 'a') # Append 'hello' at the end of file md_file.write( 'Adding a non-JSON line to the file to corrupt the metadata') # Close the file md_file.close() # Now we should get an error since the metadata data is corrupted with self.assertRaises(strax.DataCorrupted): self.st.get_array(self.test_run_id, self.target) # Also test the error is raised if be build a target that depends on corrupted data self.st.register(Peaks) with self.assertRaises(strax.DataCorrupted): self.st.get_array(self.test_run_id, 'peaks') # Cleanup if someone wants to re-use this self.st del self.st._plugin_class_registry['peaks']
def _get_metadata(self, zipn_and_dirn): zipn, dirn = zipn_and_dirn with zipfile.ZipFile(zipn) as zp: prefix = strax.dirname_to_prefix(dirn) with zp.open(f'{dirn}/{prefix}-metadata.json') as f: return json.loads(f.read())