def test_correct_data(self, filename): file1 = tf.get_abs_path('chimera_1event_2levels.log') file2 = tf.get_abs_path('chimera_1event.log') concat_files([file1, file2], output_filename=filename) reader1 = get_reader_from_filename(file1) reader2 = get_reader_from_filename(file2) reader_out = get_reader_from_filename(filename) sample_rate1 = reader1.get_sample_rate() sample_rate2 = reader2.get_sample_rate() sample_rate_out = reader_out.get_sample_rate() self.assertEqual(sample_rate_out, sample_rate1, "Unexpected sample rate. Should be {0}, was {1}.".format(sample_rate1, sample_rate_out)) self.assertEqual(sample_rate_out, sample_rate2, "Unexpected sample rate. Should be {0}, was {1}.".format(sample_rate2, sample_rate_out)) data1 = reader1.get_all_data()[0] data2 = reader2.get_all_data()[0] data_out_should_be = np.zeros(data1.size + data2.size) data_out_should_be[:data1.size] = data1[:] data_out_should_be[data1.size:] = data2[:] data_out = reader_out.get_all_data()[0] np.testing.assert_array_equal(data_out, data_out_should_be) reader1.close() reader2.close() reader_out.close()
def test_filtered_baseline(self, filename): """ Tests that the filtered baseline is the same as the unfiltered. """ data_filename = tf.get_abs_path('chimera_1event.log') reader = get_reader_from_filename(data_filename) data_all = reader.get_all_data() data = data_all[0] reader.close() baseline = np.mean(data[:150]) # Check at different filter frequencies and re-sample rates for rates in ([1.e4, 1.e6], [1.e4, 0], [5.e5, 4.e6], [7.7e4, 1.e6]): filter_freq = rates[0] re_sample_rate = rates[1] out_filename = filter_file(data_filename, filter_frequency=filter_freq, out_sample_rate=re_sample_rate, output_filename=filename) reader = get_reader_from_filename(out_filename) data2 = reader.get_all_data()[0] reader.close() # Note we re-sampled, which is why only take 30 data points. baseline2 = np.mean(data2[:20]) ratio = abs((baseline - baseline2) / baseline) print "ratio:", ratio self.assertLessEqual(ratio, 0.05, "Filtered baseline different from original. " "Should be {0}, got {1}.".format(baseline, baseline2)) os.remove(out_filename)
def test_noise(self, filename): """ Tests that the noise is added correctly(its mean and std_dev are correct). """ seconds = 1. sample_rate = 1.e6 baseline = 1. noise_loc = 1. noise_scale = 0.6 noise = stats.norm(loc=noise_loc, scale=noise_scale) create_random_data(filename, seconds=seconds, sample_rate=sample_rate, baseline=baseline, noise=noise) reader = get_reader_from_filename(filename) data_all = reader.get_all_data() data = data_all[0] mean = np.mean(data) mean_should_be = baseline + noise_loc self.assertAlmostEqual(mean, mean_should_be, 1, "Unexpected mean. Wanted {0}, got {1}.".format(mean_should_be, mean)) std_dev = np.std(data) self.assertAlmostEqual(noise_scale, std_dev, 1, "Unexpected standard deviation. " "Wanted {0}, got {1}".format(noise_scale, std_dev)) reader.close()
def test_convert_chimera_file_equality(self, filename): """ Test that the original/converted matrices and sample rates are the same for one-channel data. """ data_filename = tf.get_abs_path('chimera_1event.log') output_filename = convert_file(data_filename, output_filename=filename) orig_reader = get_reader_from_filename(data_filename) orig_data_all = orig_reader.get_all_data() orig_sample_rate = orig_reader.get_sample_rate() self.assertEqual(len(orig_data_all), 1) out_reader = DataFileReader(output_filename) out_data_all = out_reader.get_all_data() out_sample_rate = out_reader.get_sample_rate() self.assertEqual(len(out_data_all), 1) orig_data = orig_data_all[0] out_data = out_data_all[0] # assert sample rates are equal self.assertAlmostEqual(1.0 * orig_sample_rate / out_sample_rate, 1, 4) # assert the two arrays are equal np.testing.assert_array_equal(orig_data, out_data) orig_reader.close() out_reader.close()
def open_data_files(self, file_names=None): """ Analyzes the files for correctness, then adds them to the list widget. :param ListType<StringType> file_names: The file names to be included in the list widget. If not included, this function will use a QtGui.QFileDialog.getOpenFileNames to open files. :returns: BooleanType -- **True** if files were opened, **False** otherwise. """ if file_names is None: file_names = QtGui.QFileDialog.getOpenFileNames( self, 'Open data file', '.', "All types(*.h5 *.hkd *.log *.mat);;" "Pypore data files *.h5(*.h5);;" "Heka files *.hkd(*.hkd);;" "Chimera files *.log(*.log);;Gabys files *.mat(*.mat)")[0] if len(file_names) > 0: self.file_list_widget.clear() else: return are_files_opened = False open_dir = None for w in file_names: reader = get_reader_from_filename(w) # if 'error' in params: # TODO implement error handling in readers # pass # else: reader.close() are_files_opened = True item = FileListItem(w) open_dir = item.get_directory() self.file_list_widget.addItem(item) return are_files_opened
def test_noise(self, filename): """ Tests that the noise is added correctly(its mean and std_dev are correct). """ seconds = 1. sample_rate = 1.e6 baseline = 1. noise_loc = 1. noise_scale = 0.6 noise = stats.norm(loc=noise_loc, scale=noise_scale) create_random_data(filename, seconds=seconds, sample_rate=sample_rate, baseline=baseline, noise=noise) reader = get_reader_from_filename(filename) data_all = reader.get_all_data() data = data_all[0] mean = np.mean(data) mean_should_be = baseline + noise_loc self.assertAlmostEqual( mean, mean_should_be, 1, "Unexpected mean. Wanted {0}, got {1}.".format( mean_should_be, mean)) std_dev = np.std(data) self.assertAlmostEqual( noise_scale, std_dev, 1, "Unexpected standard deviation. " "Wanted {0}, got {1}".format(noise_scale, std_dev)) reader.close()
def convert_file(filename, output_filename=None): """ Convert a file to the pypore .h5 file format. Returns the new file's name. """ reader = get_reader_from_filename(filename) sample_rate = reader.get_sample_rate() n_points = reader.get_points_per_channel_total() if output_filename is None: output_filename = filename.split('.')[0] + '.h5' save_file = data_file.open_file(output_filename, mode='w', sample_rate=sample_rate, n_points=n_points) blocks_to_get = 1 data = reader.get_next_blocks(blocks_to_get)[0] n = data.size i = 0 while n > 0: save_file.root.data[i:n + i] = data[:] i += n data = reader.get_next_blocks(blocks_to_get)[0] n = data.size reader.close() save_file.flush() save_file.close() return output_filename
def test_noise_no_events(self, filename): """ Tests that noise is added correctly. """ sample_rate = 1.e5 baseline = 1.0 n = 100000 noise_scale = 1. create_specified_data(filename=filename, n=n, sample_rate=sample_rate, baseline=baseline, noise_scale=noise_scale, events=[]) reader = get_reader_from_filename(filename) data_all = reader.get_all_data() data = data_all[0] reader.close() self.assertEqual(n, data.size, "Unexpected array size. Wanted {0}, got {1}.".format(n, data.size)) decimal = 1 mean = np.mean(data) self.assertAlmostEqual(baseline, mean, decimal, "Unexpected baseline. Wanted {0}, got {1}. Tested to {2} decimals.".format(baseline, mean, decimal)) decimal = 1 std_dev = np.std(data) self.assertAlmostEqual(noise_scale, std_dev, decimal, msg="Unexpected stddev. Wanted{0}, got {1}. Tested to {2} decimals.".format(noise_scale, std_dev, decimal))
def filter_file(filename, filter_frequency, out_sample_rate, output_filename=None): """ Reads data from the filename file and uses a Butterworth low-pass filter with cutoff at filter_frequency. Outputs the filtered waveform to a new :py:class:`pypore.filetypes.data_file.DataFile`. :param StringType filename: Filename containing data to be filtered. :param DoubleType filter_frequency: Cutoff frequency for the low-pass Butterworth filter. :param DoubleType out_sample_rate: After the data is filtered, it can be resampled to roughly out_sample_rate. If \ out_sample_rate <= 0, the data will not be resampled. :param StringType output_filename: (Optional) Filename for the filtered data. If not specified, for an example filename='test.mat', the default output_filename would be 'test_filtered.h5' :returns: StringType -- The output filename of the filtered data. Usage: >>> import pypore.file_converter as fc >>> fc.filter_file("filename", 1.e4, 1.e5, "output.h5") # filter at 10kHz, resample at 100kHz """ reader = get_reader_from_filename(filename) data = reader.get_all_data()[0] sample_rate = reader.get_sample_rate() final_sample_rate = sample_rate n_points = len(data) if output_filename is None: output_filename = os.path.splitext(filename)[0] + '_filtered.h5' # wn is a fraction of the Nyquist frequency (half the sampling frequency). wn = filter_frequency / (0.5 * sample_rate) b, a = sig.butter(6, wn) filtered = sig.filtfilt(b, a, data)[:] # resample the data, if requested. if 0 < out_sample_rate < sample_rate: n_out = int(np.ceil(n_points * out_sample_rate / sample_rate)) filtered = sig.resample(filtered, num=n_out) final_sample_rate = sample_rate * (1.0 * n_out / n_points) save_file = data_file.open_file(output_filename, mode='w', sample_rate=final_sample_rate, n_points=filtered.size) save_file.root.data[:] = filtered[:] save_file.flush() save_file.close() return output_filename
def run(self): if not self.filename == '' or self.plot_options['datadict'] == '': reader = get_reader_from_filename(self.filename) self.plot_options['data'] = reader.get_all_data(self.decimate) if self.sample_rate == 0.0: self.sample_rate = reader.get_sample_rate() reader.close() if self.cancelled: return self.plot_options['sample_rate'] = self.sample_rate self.dataReady.emit({'plot_options': self.plot_options, 'status_text': '', 'thread': self})
def test_original_files_unmodified(self, filename): file1 = tf.get_abs_path('chimera_1event_2levels.log') file2 = tf.get_abs_path('chimera_1event.log') reader1 = get_reader_from_filename(file1) reader2 = get_reader_from_filename(file2) sample_rate1_orig = reader1.get_sample_rate() sample_rate2_orig = reader2.get_sample_rate() data1_orig = reader1.get_all_data()[0] data2_orig = reader2.get_all_data()[0] reader1.close() reader2.close() concat_files([file1, file2], output_filename=filename) reader1 = get_reader_from_filename(file1) reader2 = get_reader_from_filename(file2) reader_out = get_reader_from_filename(filename) sample_rate1_final = reader1.get_sample_rate() sample_rate2_final = reader2.get_sample_rate() self.assertEqual(sample_rate1_final, sample_rate1_orig, "Sample rate changed. Was {0}, now {1}.".format(sample_rate1_orig, sample_rate1_final)) self.assertEqual(sample_rate2_final, sample_rate2_orig, "Sample rate changed. Was {0}, now {1}.".format(sample_rate2_orig, sample_rate2_final)) data1 = reader1.get_all_data()[0] data2 = reader2.get_all_data()[0] np.testing.assert_array_equal(data1, data1_orig) np.testing.assert_array_equal(data2, data2_orig) reader1.close() reader2.close() reader_out.close()
def test_set_output_sample_rate(self, filename): """ Tests that we can successfully set the output sample rate, and the number of data points changes correctly. """ data_file_names = [tf.get_abs_path('chimera_1event.log'), tf.get_abs_path('chimera_1event_2levels.log')] for data_filename in data_file_names: # Open a reader and read the original sample rate orig_reader = get_reader_from_filename(data_filename) orig_sample_rate = orig_reader.get_sample_rate() orig_data = orig_reader.get_all_data() n_orig = orig_data[0].size orig_reader.close() for out_sample_rate in (100.e4, 1.e6, 5.e5): out_filename = filter_file(data_filename, 10.e4, out_sample_rate, output_filename=filename) # The output number of data points should be int(np.ceil(n_orig * out_sample_rate / orig_sample_rate)) n_out = int(np.ceil(n_orig * out_sample_rate / orig_sample_rate)) # The output sample rate should be set by n_out out_sample_rate = orig_sample_rate * (1.0 * n_out) / n_orig # Get the params from the output file out_reader = get_reader_from_filename(out_filename) o_s_r = out_reader.get_sample_rate() out_data = out_reader.get_all_data() n_o = out_data[0].size out_reader.close() self.assertEqual(n_out, n_o, "Number of re-sample points not correct. " "Original data {0}, output {1}, should be {2}.".format(n_orig, n_o, n_out)) self.assertAlmostEqual(o_s_r, out_sample_rate, 2, "Sample rate not set correctly. Was {0}, should be {1}".format(o_s_r, out_sample_rate)) os.remove(out_filename)
def _test_params_equality(self, filename, data_should_be, sample_rate): """ Tests that the data and sample_rate in filename equal data_should_be and sample_rate, respectively. """ reader = get_reader_from_filename(filename) data_all = reader.get_all_data() out_data = data_all[0] out_sample_rate = reader.get_sample_rate() reader.close() self.assertEqual(sample_rate, out_sample_rate, "Sample rates not equal. Wanted {0}, got {1}.".format(sample_rate, out_sample_rate)) np.testing.assert_array_equal(out_data, data_should_be)
def run(self): if not self.filename == '' or self.plot_options['datadict'] == '': reader = get_reader_from_filename(self.filename) self.plot_options['data'] = reader.get_all_data(self.decimate) if self.sample_rate == 0.0: self.sample_rate = reader.get_sample_rate() reader.close() if self.cancelled: return self.plot_options['sample_rate'] = self.sample_rate self.dataReady.emit({ 'plot_options': self.plot_options, 'status_text': '', 'thread': self })
def filter_file(filename, filter_frequency, out_sample_rate, output_filename=None): """ Reads data from the filename file and uses a Butterworth low-pass filter with cutoff at filter_frequency. Outputs the filtered waveform to a new :py:class:`pypore.filetypes.data_file.DataFile`. :param StringType filename: Filename containing data to be filtered. :param DoubleType filter_frequency: Cutoff frequency for the low-pass Butterworth filter. :param DoubleType out_sample_rate: After the data is filtered, it can be resampled to roughly out_sample_rate. If \ out_sample_rate <= 0, the data will not be resampled. :param StringType output_filename: (Optional) Filename for the filtered data. If not specified, for an example filename='test.mat', the default output_filename would be 'test_filtered.h5' :returns: StringType -- The output filename of the filtered data. Usage: >>> import pypore.file_converter as fC >>> fC.filter_file("filename", 1.e4, 1.e5, "output.h5") // filter at 10kHz, resample at 100kHz """ reader = get_reader_from_filename(filename) data = reader.get_all_data()[0] sample_rate = reader.get_sample_rate() final_sample_rate = sample_rate n_points = len(data) if output_filename is None: output_filename = filename.split('.')[0] + '_filtered.h5' # wn is a fraction of the Nyquist frequency (half the sampling frequency). wn = filter_frequency / (0.5 * sample_rate) b, a = sig.butter(6, wn) filtered = sig.filtfilt(b, a, data)[:] # resample the data, if requested. if 0 < out_sample_rate < sample_rate: n_out = int(np.ceil(n_points * out_sample_rate / sample_rate)) filtered = sig.resample(filtered, num=n_out) final_sample_rate = sample_rate * (1.0*n_out/n_points) save_file = data_file.open_file(output_filename, mode='w', sample_rate=final_sample_rate, n_points=filtered.size) save_file.root.data[:] = filtered[:] save_file.flush() save_file.close() return output_filename
def _test_out_sample_rate_data_len_equality(self, orig_data, orig_sample_rate, out_filename, sample_rate): out_reader = get_reader_from_filename(out_filename) out_sample_rate = out_reader.get_sample_rate() out_data = out_reader.get_all_data() out_reader.close() self.assertAlmostEqual(orig_sample_rate, out_sample_rate, 2, msg="Sampling rate changed during filter_file, " "when it should not. Was {0}, wanted {1}, got {2}".format( orig_sample_rate, sample_rate, out_sample_rate)) self.assertEqual(len(orig_data), len(orig_data), msg="Filtering changed the number of channels. Was {0}, output {1}".format(len(orig_data), len(out_data))) self.assertEqual(orig_data[0].size, out_data[0].size, msg="Output data size doesn't match the original. Was {0}, output {1}.".format( orig_data[0].size, out_data[0].size)) return out_data
def _test_params_equality(self, filename, data_should_be, sample_rate): """ Tests that the data and sample_rate in filename equal data_should_be and sample_rate, respectively. """ reader = get_reader_from_filename(filename) data_all = reader.get_all_data() out_data = data_all[0] out_sample_rate = reader.get_sample_rate() reader.close() self.assertEqual( sample_rate, out_sample_rate, "Sample rates not equal. Wanted {0}, got {1}.".format( sample_rate, out_sample_rate)) np.testing.assert_array_equal(out_data, data_should_be)
def test_same_sample_rate_no_change(self, filename): """ Tests that if we set the output sample rate to < 0, the sampling doesn't change, but the file is filtered. """ data_filename = tf.get_abs_path('chimera_1event.log') # Open a reader and read the original sample rate orig_reader = get_reader_from_filename(data_filename) orig_sample_rate = orig_reader.get_sample_rate() orig_data = orig_reader.get_all_data() orig_reader.close() for sample_rate in (-1, 0., orig_sample_rate, orig_sample_rate + 100.): # filter the file, with negative sample rate out_filename = filter_file(data_filename, 10.e4, sample_rate, output_filename=filename) self._test_out_sample_rate_data_len_equality(orig_data, orig_sample_rate, out_filename, sample_rate) os.remove(out_filename)
def test_noise_no_events(self, filename): """ Tests that noise is added correctly. """ sample_rate = 1.e5 baseline = 1.0 n = 100000 noise_scale = 1. create_specified_data(filename=filename, n=n, sample_rate=sample_rate, baseline=baseline, noise_scale=noise_scale, events=[]) reader = get_reader_from_filename(filename) data_all = reader.get_all_data() data = data_all[0] reader.close() self.assertEqual( n, data.size, "Unexpected array size. Wanted {0}, got {1}.".format(n, data.size)) decimal = 1 mean = np.mean(data) self.assertAlmostEqual( baseline, mean, decimal, "Unexpected baseline. Wanted {0}, got {1}. Tested to {2} decimals." .format(baseline, mean, decimal)) decimal = 1 std_dev = np.std(data) self.assertAlmostEqual( noise_scale, std_dev, decimal, msg="Unexpected stddev. Wanted{0}, got {1}. Tested to {2} decimals." .format(noise_scale, std_dev, decimal))
def open_data_files(self, file_names=None): """ Analyzes the files for correctness, then adds them to the list widget. :param ListType<StringType> file_names: The file names to be included in the list widget. If not included, this function will use a QtGui.QFileDialog.getOpenFileNames to open files. :returns: BooleanType -- **True** if files were opened, **False** otherwise. """ if file_names is None: file_names = QtGui.QFileDialog.getOpenFileNames(self, 'Open data file', '.', "All types(*.h5 *.hkd *.log *.mat);;" "Pypore data files *.h5(*.h5);;" "Heka files *.hkd(*.hkd);;" "Chimera files *.log(*.log);;Gabys files *.mat(*.mat)")[0] if len(file_names) > 0: self.file_list_widget.clear() else: return are_files_opened = False open_dir = None for w in file_names: reader = get_reader_from_filename(w) # if 'error' in params: # TODO implement error handling in readers # pass # else: reader.close() are_files_opened = True item = FileListItem(w) open_dir = item.get_directory() self.file_list_widget.addItem(item) return are_files_opened
def concat_files(files, output_filename=None): """ This function concatenates multiple files into one data file. All of the sampling rates of the original files must be the same. :param list files: List of string file names OR :py:class:`Readers <pypore.i_o.abstract_reader.AbstractReader>`. :param output_filename: Optional file name for the resulting file. :raises: :py:exc:`ValueError` -- if the length of the files list is < 2. :raises: :py:exc:`SamplingRatesMismatchError <pypore.file_converter.SamplingRatesMismatchError>` -- if the sampling rates do not match in all of the files. >>> from pypore.i_o.data_file_reader import DataFileReader >>> concat_files(['file1.log', DataFileReader('dataFile.h5')], output_filename='concatenated.h5') # can pass strings or Readers """ if len(files) < 2: raise ValueError("Minimum length of files list is 2.") # Get the first sample rate should_close_reader = False reader = files[0] if not isinstance(reader, AbstractReader): reader = get_reader_from_filename(reader) should_close_reader = True sample_rate = reader.get_sample_rate() if output_filename is None: basename = os.path.basename(reader.get_filename()) output_filename = basename.split( '.')[0] + '_concatenated_' + datetime.datetime.now().strftime( "%Y%m%d_%H%M%S") + '.h5' if should_close_reader: reader.close() n = 0 # Get the total number of data points, and check that the sampling rates are equal. for i, reader in enumerate(files): should_close_reader = False # If it's not already a reader if not isinstance(reader, AbstractReader): reader = get_reader_from_filename(reader) should_close_reader = True curr_sample_rate = reader.get_sample_rate() if curr_sample_rate != sample_rate: raise SamplingRatesMismatchError( "Sampling rates differ in files. Found {0} and {1}.".format( curr_sample_rate, sample_rate)) n += reader.get_all_data()[0].size if should_close_reader: reader.close() # Open a new data file. new_data_file = df.open_file(output_filename, mode='w', n_points=n, sample_rate=sample_rate) curr_i = 0 for i, reader in enumerate(files): should_close_reader = False # If it's not already a reader if not isinstance(reader, AbstractReader): reader = get_reader_from_filename(reader) should_close_reader = True n_i = reader.get_points_per_channel_total() new_data_file.root.data[curr_i:curr_i + n_i] = reader.get_all_data()[0] curr_i += n_i if should_close_reader: reader.close() new_data_file.close() return output_filename