def find_results(self, output=None, output_prefix=None, **kwds): """ A list of :ref:`HDF5ResultsStore` results """ output = output or self.results['output'] output_prefix = output_prefix or self.results['output_prefix'] pattern = _pattern_to_regex(output_prefix or self.results.get('output_prefix')) results = _find(output, pattern, regex=True) for result in results: yield HDF5ResultsStore(result, **kwds)
def test_write_tags(self, test_data_1): tags = { 'much': 'so', 'data': 'wow' } test_data_1.close() with HDF5ResultsStore(test_data_1.filename, 'r+') as h5: h5.update_tags(**tags) for tagname, value in tags.items(): assert tagname in h5.tags assert h5.tags[tagname] == value
def batch_block(config, readers, window, overwrite=False): import logging import numpy as np from yatsm import io from yatsm.results import HDF5ResultsStore from yatsm.pipeline import Pipe logger = logging.getLogger('yatsm') def sel_pix(pipe, y, x): return Pipe(data=pipe['data'].sel(y=y, x=x), record=pipe.get('record', None)) logger.info('Working on window: {}'.format(window)) data = io.read_and_preprocess(config['data']['datasets'], readers, window, out=None) store_kwds = { 'window': window, 'reader': config.primary_reader, 'root': config['results']['output'], 'pattern': config['results']['output_prefix'], } # TODO: guess for number of records to store # from IPython.core.debugger import Pdb; Pdb().set_trace() with HDF5ResultsStore.from_window(**store_kwds) as store: # TODO: read this from pre-existing results pipe = Pipe(data=data) pipeline = config.get_pipeline(pipe, overwrite=overwrite) from IPython.core.debugger import Pdb Pdb().set_trace() # TODO: finish checking for resume if store.completed(pipeline) and not overwrite: logger.info('Already completed: {}'.format(store.filename)) return pipe = pipeline.run_eager(pipe) record_results = defaultdict(list) n_ = data.y.shape[0] * data.x.shape[0] for i, (y, x) in enumerate(product(data.y.values, data.x.values)): logger.debug('Processing pixel {pct:>4.2f}%: y/x {y}/{x}'.format( pct=i / n_ * 100, y=y, x=x)) pix_pipe = sel_pix(pipe, y, x) result = pipeline.run(pix_pipe, check_eager=False) # TODO: figure out what to do with 'data' results for k, v in result['record'].items(): record_results[k].append(v) for name, result in record_results.items(): record_results[name] = np.concatenate(result) if record_results: store.write_result(pipeline, record_results, overwrite=overwrite) # TODO: write out cached data return store.filename
def test_create_no_georef(self, tmpdir): with pytest.raises(TypeError) as te: HDF5ResultsStore(str(tmpdir.join('1.nc')), georef=None) assert 'Must specify `georef` as `Georeference`' in str(te.value)
def test_data_1(tmpdir): with HDF5ResultsStore(str(tmpdir.join('1.nc')), georef=_GEOREF) as store: return store