def test_ascii_spec(self): ascii_spec_types = ['ascii', 'bytes'] for spec_type in ascii_spec_types: with self.subTest(spec_type=spec_type): spec = DatasetSpec('an example dataset', spec_type, name='data') value = 'a' ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, b'a') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') value = b'a' ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, b'a') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') value = ['a', 'b'] ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertListEqual(ret, [b'a', b'b']) self.assertIs(type(ret[0]), bytes) self.assertEqual(ret_dtype, 'ascii') value = np.array(['a', 'b']) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) np.testing.assert_array_equal(ret, np.array(['a', 'b'], dtype='S1')) self.assertEqual(ret_dtype, 'ascii') value = np.array(['a', 'b'], dtype='S1') ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) np.testing.assert_array_equal(ret, value) self.assertEqual(ret_dtype, 'ascii') value = [] ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertListEqual(ret, value) self.assertEqual(ret_dtype, 'ascii') value = 1 msg = "Expected unicode or ascii string, got <class 'int'>" with self.assertRaisesWith(ValueError, msg): ObjectMapper.convert_dtype(spec, value) value = DataChunkIterator(np.array(['a', 'b'])) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion self.assertIs(ret, value) self.assertEqual(ret_dtype, 'ascii') value = DataChunkIterator(np.array(['a', 'b'], dtype='S1')) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion self.assertIs(ret, value) self.assertEqual(ret_dtype, 'ascii')
def test_list_none_dtype(self): """Test that DataChunkIterator has the passed-in dtype and no chunks when given a list of None. """ a = [None, None, None] dci = DataChunkIterator(a, dtype=np.dtype('int')) self.assertTupleEqual(dci.maxshape, (3,)) self.assertEqual(dci.dtype, np.dtype('int')) count = 0 for chunk in dci: pass self.assertEqual(count, 0) self.assertTupleEqual(dci.recommended_data_shape(), (3,)) self.assertIsNone(dci.recommended_chunk_shape())
def test_set_maxshape(self): a = np.array([3]) dci = DataChunkIterator(a, maxshape=(5, 2, 3), buffer_size=2) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertTupleEqual(chunk.selection, (slice(0, 1), slice(None), slice(None))) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape())
def test_DataChunkIterator_error_on_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (error on undetermined axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2, ignore_undetermined=False) self.assertFalse(res.result) self.assertEqual(res.error, 'AXIS_LEN_ERROR') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ((0, 0),)) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_DataChunkIterator_ignore_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (ignore axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2, ignore_undetermined=True) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ((0, 0),)) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_DataChunkIterators_match(self): # Compare data chunk iterators d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_write_dataset_datachunkiterator_data_and_time(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) tstamps = np.arange(5) tsiter = DataChunkIterator.from_iterable(tstamps) ts = TimeSeries('ts_name', daiter, 'A', timestamps=tsiter) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) with File(self.path, 'r') as f: dset = f['/acquisition/ts_name/data'] self.assertListEqual(dset[:].tolist(), a.tolist())
def test_standard_iterator_unbuffered(self): dci = DataChunkIterator(data=range(10), buffer_size=1) self.assertEqual(dci.dtype, np.dtype(int)) self.assertTupleEqual(dci.maxshape, (10, )) self.assertTupleEqual(dci.recommended_data_shape(), (10, )) # Test before and after iteration count = 0 for chunk in dci: self.assertEqual(chunk.data.shape[0], 1) count += 1 self.assertEqual(count, 10) self.assertTupleEqual(dci.recommended_data_shape(), (10, )) # Test before and after iteration self.assertIsNone(dci.recommended_chunk_shape())
def test_none_iter(self): """Test that DataChunkIterator __init__ sets defaults correctly and all chunks and recommended shapes are None. """ dci = DataChunkIterator(dtype=np.dtype('int')) self.assertIsNone(dci.maxshape) self.assertEqual(dci.dtype, np.dtype('int')) self.assertEqual(dci.buffer_size, 1) self.assertEqual(dci.iter_axis, 0) count = 0 for chunk in dci: pass self.assertEqual(count, 0) self.assertIsNone(dci.recommended_data_shape()) self.assertIsNone(dci.recommended_chunk_shape())
def test_start_with_none(self): a = [None, None, 3] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (3,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(2, 3)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (3,)) self.assertIsNone(dci.recommended_chunk_shape())
def test_dci_input(self): spec = DatasetSpec('an example dataset', 'int64', name='data') value = DataChunkIterator(np.array([1, 2, 3], dtype=np.int32)) msg = "Spec 'data': Value with data type int32 is being converted to data type int64 as specified." with self.assertWarnsWith(UserWarning, msg): ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion self.assertIs(ret, value) self.assertEqual(ret_dtype, np.int64) spec = DatasetSpec('an example dataset', 'int16', name='data') value = DataChunkIterator(np.array([1, 2, 3], dtype=np.int32)) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion self.assertIs(ret, value) self.assertEqual(ret_dtype, np.int32) # increase precision
def test_sparse_data_buffer_aligned(self): a = [1, 2, 3, 4, None, None, 7, 8, None, None] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (10,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (2,)) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[1])) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) self.assertIsNone(dci.recommended_chunk_shape())
def test_list_numpy_scalar(self): a = np.array([3]) dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (1,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(0, 1)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (1,)) self.assertIsNone(dci.recommended_chunk_shape())
def test_multidimensional_list_first_axis(self): """Test DataChunkIterator with multidimensional list data, no buffering, and iterating on the first dimension. """ a = np.arange(30).reshape(5, 2, 3).tolist() dci = DataChunkIterator(a) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 5) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape())
def test_numpy_iter_buffered_middle_axis(self): """Test DataChunkIterator with numpy data, buffering, and iterating on a middle dimension. """ a = np.arange(45).reshape(5, 3, 3) dci = DataChunkIterator(data=a, buffer_size=2, iter_axis=1) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 3)) else: self.assertTupleEqual(chunk.shape, (5, 1, 3)) count += 1 self.assertEqual(count, 2) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape())
def test_numpy_iter_unmatched_buffer_size(self): a = np.arange(10) dci = DataChunkIterator(data=a, buffer_size=3) self.assertTupleEqual(dci.maxshape, a.shape) self.assertEqual(dci.dtype, a.dtype) count = 0 for chunk in dci: if count < 3: self.assertTupleEqual(chunk.data.shape, (3,)) else: self.assertTupleEqual(chunk.data.shape, (1,)) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape())
def test_numpy_iter_buffered_first_axis(self): """Test DataChunkIterator with numpy data, buffering, and iterating on the first dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=2) count = 0 for chunk in dci: if count < 2: self.assertTupleEqual(chunk.shape, (2, 2, 3)) else: self.assertTupleEqual(chunk.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape())
def test_standard_iterator_unmatched_buffersized(self): dci = DataChunkIterator(data=range(10), buffer_size=3) self.assertEqual(dci.dtype, np.dtype(int)) self.assertTupleEqual(dci.maxshape, (10,)) self.assertIsNone(dci.recommended_chunk_shape()) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration count = 0 for chunk in dci: if count < 3: self.assertTupleEqual(chunk.data.shape, (3,)) else: self.assertTupleEqual(chunk.data.shape, (1,)) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration
def test_list_none(self): """Test that DataChunkIterator has no dtype or chunks when given a list of None. """ a = [None, None, None] with self.assertRaisesWith(Exception, 'Data type could not be determined. Please specify dtype in ' 'DataChunkIterator init.'): DataChunkIterator(a)
def test_write_dataset_iterable_multidimensional_array(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', daiter, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), a.tolist())
def test_numeric_spec(self): spec_type = 'numeric' spec = DatasetSpec('an example dataset', spec_type, name='data') value = np.uint64(4) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, value) self.assertIs(type(ret), np.uint64) self.assertEqual(ret_dtype, np.uint64) value = DataChunkIterator(data=[1, 2, 3]) ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, value) self.assertIs(ret.dtype.type, np.dtype(int).type) self.assertIs(type(ret.data[0]), int) self.assertEqual(ret_dtype, np.dtype(int).type) value = ['a', 'b'] msg = "Cannot convert from <class 'str'> to 'numeric' specification dtype." with self.assertRaisesWith(ValueError, msg): ObjectMapper.convert_dtype(spec, value) value = np.array(['a', 'b']) msg = "Cannot convert from <class 'numpy.str_'> to 'numeric' specification dtype." with self.assertRaisesWith(ValueError, msg): ObjectMapper.convert_dtype(spec, value) value = [] msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype." with self.assertRaisesWith(ValueError, msg): ObjectMapper.convert_dtype(spec, value)
def write_lfp( nwbfile: NWBFile, data: ArrayLike, fs: float, electrode_inds: Optional[List[int]] = None, name: Optional[str] = "LFP", description: Optional[str] = "local field potential signal", ): """ Add LFP from neuroscope to a "ecephys" processing module of an NWBFile. Parameters ---------- nwbfile: pynwb.NWBFile data: array-like fs: float electrode_inds: list(int), optional name: str, optional description: str, optional Returns ------- LFP pynwb.ecephys.ElectricalSeries """ if electrode_inds is None: if nwbfile.electrodes is not None and data.shape[1] <= len( nwbfile.electrodes.id.data[:]): electrode_inds = list(range(data.shape[1])) else: electrode_inds = list(range(len(nwbfile.electrodes.id.data[:]))) table_region = nwbfile.create_electrode_table_region( electrode_inds, "electrode table reference") data = H5DataIO( DataChunkIterator(tqdm(data, desc="writing lfp data"), buffer_size=int(fs * 3600)), compression="gzip", ) lfp_electrical_series = ElectricalSeries( name=name, description=description, data=data, electrodes=table_region, conversion=1e-6, rate=fs, resolution=np.nan, ) ecephys_mod = check_module( nwbfile, "ecephys", "intermediate data from extracellular electrophysiology recordings, e.g., LFP", ) if "LFP" not in ecephys_mod.data_interfaces: ecephys_mod.add_data_interface(LFP(name="LFP")) ecephys_mod.data_interfaces["LFP"].add_electrical_series( lfp_electrical_series) return lfp_electrical_series
def test_build_dataio_datachunkiterator(self): # hdmf#512 """Test building of a dataset with no dtype and no data_type with value DataIO wrapping a DCI.""" container = Baz( 'my_baz', H5DataIO(DataChunkIterator(['a', 'b', 'c', 'd']), chunks=True), 'value1') builder = self.type_map.build(container) self.assertIsInstance(builder.get('data'), H5DataIO) self.assertIsInstance(builder.get('data').data, DataChunkIterator)
def test_dci(self): """Test get_data_shape on DataChunkIterators of various shapes and maxshape.""" dci = DataChunkIterator(dtype=np.dtype(int)) res = get_data_shape(dci) self.assertIsNone(res) dci = DataChunkIterator(data=[1, 2]) res = get_data_shape(dci) self.assertTupleEqual(res, (2, )) dci = DataChunkIterator(data=[[1, 2], [3, 4], [5, 6]]) res = get_data_shape(dci) self.assertTupleEqual(res, (3, 2)) # test that maxshape takes priority dci = DataChunkIterator(data=[[1, 2], [3, 4], [5, 6]], maxshape=(None, 100)) res = get_data_shape(dci) self.assertTupleEqual(res, (None, 100))
def test_multidimensional_list_last_axis(self): """Test DataChunkIterator with multidimensional list data, no buffering, and iterating on the last dimension. """ a = np.arange(30).reshape(5, 2, 3).tolist() warn_msg = ('Iterating over an axis other than the first dimension of list or tuple data ' 'involves converting the data object to a numpy ndarray, which may incur a computational ' 'cost.') with self.assertWarnsWith(UserWarning, warn_msg): dci = DataChunkIterator(a, iter_axis=2) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape())
def test_custom_iter_last_axis(self): def my_iter(): count = 0 a = np.arange(30).reshape(5, 2, 3) while count < a.shape[2]: val = a[:, :, count] count = count + 1 yield val return dci = DataChunkIterator(data=my_iter(), buffer_size=2, iter_axis=2) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 2)) else: self.assertTupleEqual(chunk.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 2) # self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 2)) self.assertIsNone(dci.recommended_chunk_shape())
def write_lfp(nwbfile, data, fs, name='LFP', description='local field potential signal', electrode_inds=None): """ Add LFP from neuroscope to a "ecephys" processing module of an NWBFile Parameters ---------- nwbfile: pynwb.NWBFile data: array-like fs: float name: str description: str electrode_inds: list(int) Returns ------- LFP pynwb.ecephys.ElectricalSeries """ if electrode_inds is None: electrode_inds = list(range(data.shape[1])) table_region = nwbfile.create_electrode_table_region( electrode_inds, 'electrode table reference') data = H5DataIO(DataChunkIterator(tqdm(data, desc='writing lfp data'), buffer_size=int(fs * 3600)), compression='gzip') lfp_electrical_series = ElectricalSeries(name=name, description=description, data=data, electrodes=table_region, conversion=np.nan, rate=fs, resolution=np.nan) ecephys_mod = check_module( nwbfile, 'ecephys', 'intermediate data from extracellular electrophysiology recordings, e.g., LFP' ) if 'LFP' not in ecephys_mod.data_interfaces: ecephys_mod.add_data_interface(LFP(name='LFP')) ecephys_mod.data_interfaces['LFP'].add_electrical_series( lfp_electrical_series) return lfp_electrical_series
def _create_ophys_raw(self, nwbfile: NWBFile, metadata: dict, link_ophys_raw: bool): """Add raw ophys data from tiff files""" print('Converting raw ophys data...') # Iteratively read tiff ophys data def tiff_iterator(paths_tiff): for tf in paths_tiff: tif = TIFF.open(tf) for image in tif.iter_images(): yield image tif.close() # Get imaging rate with h5py.File(self.source_data['path_ophys_processed'], 'r') as f: imaging_rate = 1 / f['dto'][0] # Imaging Plane imaging_plane = self._get_imaging_plane( nwbfile=nwbfile, metadata_imgplane=metadata['Ophys']['ImagingPlane']) metadata_twops = metadata['Ophys']['TwoPhotonSeries_green'] # Link to raw data files if link_ophys_raw: starting_frames = [0] for i, tf in enumerate( self.source_data['path_tiff_green_channel'][0:-1]): n_frames = pImage.open(tf).n_frames starting_frames.append(n_frames + starting_frames[i]) two_photon_series = pynwb.ophys.TwoPhotonSeries( name=metadata_twops['name'], imaging_plane=imaging_plane, format='tiff', external_file=self.source_data['paths_tiff'], starting_frame=starting_frames, starting_time=0., rate=imaging_rate, unit='no unit') # Store raw data else: raw_data_iterator = DataChunkIterator(data=tiff_iterator( [self.source_data['path_tiff_green_channel']])) two_photon_series = pynwb.ophys.TwoPhotonSeries( name=metadata_twops['name'], imaging_plane=imaging_plane, data=raw_data_iterator, starting_time=0., rate=imaging_rate, unit='no unit') nwbfile.add_acquisition(two_photon_series)
def add_raw_nlx_data(nwbfile, raw_nlx_path, electrode_table_region): """Add raw acquisition data from Neuralynx CSC .ncs files to an NWB file using a data chunk iterator Parameters ---------- nwbfile : NWBFile The NWBFile object to add raw data to. raw_nlx_path : Path Path of directory of raw NLX CSC files. electrode_table_region : DynamicTableRegion The set of electrodes corresponding to these acquisition time series data. There should be one .ncs data file for every electrode in the electrode_table_region. """ print('Adding raw NLX data using data chunk iterator') num_electrodes = len(electrode_table_region) # get paths to all CSC data files, excluding the 16 kB header files with '_' in the name data_files = natsorted( [x.name for x in raw_nlx_path.glob('CSC*.ncs') if '_' not in x.stem]) data_paths = [raw_nlx_path / x for x in data_files] assert (len(data_paths) == num_electrodes) # read first file fully to initialize a few variables # NOTE: use starting time of 0. the neuralynx starting time is arbitrary. # TODO: store neuralynx starting time in case it is useful for alignment raw_header, raw_ts, raw_data = read_csc_file(data_paths[0]) starting_time = 0. rate = float(raw_header['SamplingFrequency']) conversion_factor = raw_header['ADBitVolts'] # TODO put header data into NWBFile under Neuralynx device data = raw_generator(raw_nlx_path, first_raw_ts=raw_ts, first_raw_data=raw_data) ephys_data = DataChunkIterator(data=data, iter_axis=1, maxshape=(len(raw_ts), num_electrodes), dtype=np.dtype('int16')) # NOTE: starting time and rate are provided instead of timestamps. rate may be 32000.012966 Hz rather than 32000 Hz # but use the reported 32000 Hz anyway. ephys_ts = ElectricalSeries( name='ElectricalSeries', data=ephys_data, electrodes=electrode_table_region, starting_time=starting_time, rate=rate, conversion=conversion_factor, description='This is a recording from the hippocampus') nwbfile.add_acquisition(ephys_ts)
def test_dci_data_arr(self): def generator_factory(): return (np.array([i, i+1]) for i in range(100)) data = DataChunkIterator(data=generator_factory()) ts1 = TimeSeries('test_ts1', data, 'grams', starting_time=0.0, rate=0.1) # with self.assertWarnsRegex(UserWarning, r'.*name: \'test_ts1\'.*'): with self.assertWarns(UserWarning): self.assertIs(ts1.num_samples, None) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)