def test_002_events_only(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 10000, 1000, 0, 120.75) with EventDetectionTools(fh, group_name='EventDetection_000', meta={'name': 'test'}) as evdet: data = np.zeros(100, dtype=[('start', int), ('length', int), ('mean', float), ('stdv', float)]) data['start'][2] = 10010 data['start'][46] = 10470 data['length'][46] = 10 data['start'][53] = 10520 data['start'][97] = 10960 data['length'][97] = 20 read_attrs = {'read_number': 12} evdet.set_event_data(data, read_attrs) attrs = { 'name': 'test', 'version': 0, 'time_stamp': 'just now', 'event_detection': 'Analyses/EventDetection_000' } fh.add_analysis('segmentation', 'Segmentation_000', attrs) segment_data = { 'has_template': 1, 'has_complement': 1, 'start_event_template': 2, 'end_event_template': 47, 'start_event_complement': 53, 'end_event_complement': 98 } fh.set_summary_data('Segmentation_000', 'segmentation', segment_data) with SegmentationTools(fh, group_name='Segmentation_000') as segment: results = segment.get_results() self.assertDictEqual( { 'has_template': True, 'has_complement': True, 'start_event_template': 2, 'end_event_template': 47, 'start_event_complement': 53, 'end_event_complement': 98, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 }, results)
def test_001_raw_only(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75) raw = np.empty(1000, dtype=np.int16) raw[:] = range(1000) fh.add_raw_data(raw) attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'} fh.add_analysis('segmentation', 'Segmentation_000', attrs) segment_data = { 'has_template': 1, 'has_complement': 1, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 } fh.set_summary_data('Segmentation_000', 'segmentation', segment_data) with SegmentationTools(fh, group_name='Segmentation_000') as segment: results = segment.get_results() self.assertDictEqual( { 'has_template': True, 'has_complement': True, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 }, results) temp_raw = segment.get_raw_data('template', scale=False) np.testing.assert_array_equal(temp_raw, raw[10:480]) comp_raw = segment.get_raw_data('complement', scale=False) np.testing.assert_array_equal(comp_raw, raw[520:980]) temp_raw, comp_raw = segment.get_raw_data('both', scale=False) np.testing.assert_array_equal(temp_raw, raw[10:480]) np.testing.assert_array_equal(comp_raw, raw[520:980]) temp_raw, comp_raw = segment.get_raw_data('both', scale=True) scaled_temp = raw[10:480] * 0.1 scaled_comp = raw[520:980] * 0.1 np.testing.assert_array_almost_equal(temp_raw, scaled_temp, decimal=5) np.testing.assert_array_almost_equal(comp_raw, scaled_comp, decimal=5)
def calculate_speed(self, section, alignment_results=None): """ Calculate speed using alignment information. :param section: The section (template or complement) we're calculating speed for. :param alignment_results: Optional dictionary of the alignment summary, so that speed can be calculated without having to write the summary out to the fast5 file first. :return: Speed in bases per second or zero if the speed could not be calculated. The only reliable way we have of finding out how many bases have gone through the pore is by looking at how much of the reference the sequence aligned to. This takes that information and uses it to calculate speed in reference-bases-per-second. """ speed = 0.0 if alignment_results: results = self._get_results(alignment_results) else: results = self.get_results()[section] if results['status'] != 'match found': return 0.0 ref_span = results['ref_span'] ref_len = ref_span[1] - ref_span[0] seq_span = results['seq_span'] seq_len = seq_span[1] - seq_span[0] total_len = results['seq_len'] sample_rate = self.handle.get_channel_info()['sampling_rate'] # We need the duration from the segmentation results chain = self.handle.get_chain(self.group_name) if chain is not None: segmentation_group = dict(chain).get('segmentation') else: segmentation_group = None duration = 0 if segmentation_group is not None: with SegmentationTools(self.handle, group_name=segmentation_group) as seg: summary = seg.get_results() if summary is not None: duration = summary['duration_{}'.format(section)] if duration == 0: return 0.0 normalized_duration = duration * seq_len / float(total_len) speed = sample_rate * ref_len / normalized_duration return speed