Пример #1
0
    def __init__(self,
                 basename,
                 filename,
                 N_t,
                 n_channels=None,
                 n_total_channels=None,
                 prb_file=None,
                 dtype=None,
                 sample_rate=None,
                 offset=0,
                 gain=0.01
                 ):

        self.n_features_per_channel = 3
        self.n_total_channels = n_total_channels
        self.extract_s_after = self.extract_s_before = extract_s_before = extract_s_after = int(N_t - 1)//2

        # set to True if your data is already pre-filtered (much quicker)
        filtered_datfile = False

        # Filtering parameters for PCA (these are ignored if filtered_datfile == True)
        filter_low = 500.
        filter_high = 0.95 * .5 * sample_rate
        filter_butter_order = 3

        self.basename = basename
        self.kwik_path = basename + '.kwik'
        self.dtype = dtype
        self.prb_file = prb_file
        self.probe = load_probe(prb_file)

        self.sample_rate = sample_rate
        self.filtered_datfile = filtered_datfile

        self._sd = SpikeDetekt(probe=self.probe,
                               n_features_per_channel=
                               self.n_features_per_channel,
                               pca_n_waveforms_max=10000,
                               extract_s_before=extract_s_before,
                               extract_s_after=extract_s_after,
                               sample_rate=sample_rate,
                               )
        self.n_samples_w = extract_s_before + extract_s_after

        # A xxx.filtered.trunc file may be created if needed.
        self.file, self.traces_f = _read_filtered(filename,
                                       n_channels=n_total_channels,
                                       dtype=dtype,
                                       )
        self.n_samples, self.n_total_channels = self.traces_f.shape
        self.n_channels = n_channels
        assert n_total_channels == self.n_total_channels
        info("Loaded traces: {}.".format(self.traces_f.shape))

        # Load spikes.
        self.spike_samples, self.spike_clusters = _read_spikes(basename)
        self.n_spikes = len(self.spike_samples)
        assert len(self.spike_clusters) == self.n_spikes
        info("Loaded {} spikes.".format(self.n_spikes))

        # Chunks when computing features.
        self.chunk_size = 2500
        self.n_chunks   = int(np.ceil(self.n_spikes / self.chunk_size))

        # Load templates and masks.
        self.templates, self.template_masks = _read_templates(basename, self.probe, self.n_total_channels, self.n_channels)
        self.n_templates = len(self.templates)
        info("Loaded templates: {}.".format(self.templates.shape))

        # Load amplitudes.
        self.amplitudes = _read_amplitudes(basename, self.n_templates, self.n_spikes, self.spike_clusters)

        if extract_features:
            # The WaveformLoader fetches and filters waveforms from the raw traces dynamically.
            n_samples = (extract_s_before, extract_s_after)
            b_filter = bandpass_filter(rate=self.sample_rate,
                                       low=filter_low,
                                       high=filter_high,
                                       order=filter_butter_order)

            def filter(x):
              return apply_filter(x, b_filter)

            filter_margin = filter_butter_order * 3

            nodes            = []
            for key in self.probe['channel_groups'].keys():
              nodes += self.probe['channel_groups'][key]['channels']
            nodes    = np.array(nodes, dtype=np.int32)

            if filtered_datfile:
              self._wl = WaveformLoader(traces=self.traces_f,
                                        n_samples=self.n_samples_w,
                                        dc_offset=offset,
                                        scale_factor=gain,
                                        channels=nodes
                                        )
            else:
              self._wl = WaveformLoader(traces=self.traces_f,
                                        n_samples=self.n_samples_w,
                                        filter=filter,
                                        filter_margin=filter_margin,
                                        dc_offset=offset,
                                        scale_factor=gain,
                                        channels=nodes
                                        )

            # A virtual (n_spikes, n_samples, n_channels) array that is
            # memmapped to the filtered data file.
            self.waveforms = SpikeLoader(self._wl, self.spike_samples)

            assert self.waveforms.shape == (self.n_spikes,
                                            self.n_samples_w,
                                            self.n_channels)
            assert self.template_masks.shape == (self.n_templates, self.n_channels)
Пример #2
0
class Converter(object):
    def __init__(self,
                 basename,
                 filename,
                 N_t,
                 n_channels=None,
                 n_total_channels=None,
                 prb_file=None,
                 dtype=None,
                 sample_rate=None,
                 offset=0,
                 gain=0.01
                 ):

        self.n_features_per_channel = 3
        self.n_total_channels = n_total_channels
        self.extract_s_after = self.extract_s_before = extract_s_before = extract_s_after = int(N_t - 1)//2

        # set to True if your data is already pre-filtered (much quicker)
        filtered_datfile = False

        # Filtering parameters for PCA (these are ignored if filtered_datfile == True)
        filter_low = 500.
        filter_high = 0.95 * .5 * sample_rate
        filter_butter_order = 3

        self.basename = basename
        self.kwik_path = basename + '.kwik'
        self.dtype = dtype
        self.prb_file = prb_file
        self.probe = load_probe(prb_file)

        self.sample_rate = sample_rate
        self.filtered_datfile = filtered_datfile

        self._sd = SpikeDetekt(probe=self.probe,
                               n_features_per_channel=
                               self.n_features_per_channel,
                               pca_n_waveforms_max=10000,
                               extract_s_before=extract_s_before,
                               extract_s_after=extract_s_after,
                               sample_rate=sample_rate,
                               )
        self.n_samples_w = extract_s_before + extract_s_after

        # A xxx.filtered.trunc file may be created if needed.
        self.file, self.traces_f = _read_filtered(filename,
                                       n_channels=n_total_channels,
                                       dtype=dtype,
                                       )
        self.n_samples, self.n_total_channels = self.traces_f.shape
        self.n_channels = n_channels
        assert n_total_channels == self.n_total_channels
        info("Loaded traces: {}.".format(self.traces_f.shape))

        # Load spikes.
        self.spike_samples, self.spike_clusters = _read_spikes(basename)
        self.n_spikes = len(self.spike_samples)
        assert len(self.spike_clusters) == self.n_spikes
        info("Loaded {} spikes.".format(self.n_spikes))

        # Chunks when computing features.
        self.chunk_size = 2500
        self.n_chunks   = int(np.ceil(self.n_spikes / self.chunk_size))

        # Load templates and masks.
        self.templates, self.template_masks = _read_templates(basename, self.probe, self.n_total_channels, self.n_channels)
        self.n_templates = len(self.templates)
        info("Loaded templates: {}.".format(self.templates.shape))

        # Load amplitudes.
        self.amplitudes = _read_amplitudes(basename, self.n_templates, self.n_spikes, self.spike_clusters)

        if extract_features:
            # The WaveformLoader fetches and filters waveforms from the raw traces dynamically.
            n_samples = (extract_s_before, extract_s_after)
            b_filter = bandpass_filter(rate=self.sample_rate,
                                       low=filter_low,
                                       high=filter_high,
                                       order=filter_butter_order)

            def filter(x):
              return apply_filter(x, b_filter)

            filter_margin = filter_butter_order * 3

            nodes            = []
            for key in self.probe['channel_groups'].keys():
              nodes += self.probe['channel_groups'][key]['channels']
            nodes    = np.array(nodes, dtype=np.int32)

            if filtered_datfile:
              self._wl = WaveformLoader(traces=self.traces_f,
                                        n_samples=self.n_samples_w,
                                        dc_offset=offset,
                                        scale_factor=gain,
                                        channels=nodes
                                        )
            else:
              self._wl = WaveformLoader(traces=self.traces_f,
                                        n_samples=self.n_samples_w,
                                        filter=filter,
                                        filter_margin=filter_margin,
                                        dc_offset=offset,
                                        scale_factor=gain,
                                        channels=nodes
                                        )

            # A virtual (n_spikes, n_samples, n_channels) array that is
            # memmapped to the filtered data file.
            self.waveforms = SpikeLoader(self._wl, self.spike_samples)

            assert self.waveforms.shape == (self.n_spikes,
                                            self.n_samples_w,
                                            self.n_channels)
            assert self.template_masks.shape == (self.n_templates, self.n_channels)

    def iter_spikes(self):
        for idx in range(0, self.n_chunks):
            i = idx * self.chunk_size
            j = (idx + 1) * self.chunk_size
            j_clip = min(j, self.n_spikes)
            yield (i, j_clip)

    def compute_pcs(self):
        k = self.n_spikes // self._sd._kwargs['pca_n_waveforms_max']

        # Find the masks of the selection of spikes.
        clu = self.spike_clusters[::k]
        masks = self.template_masks[clu]

        w, m = self.waveforms[::k], masks
        self.pcs = self._sd.waveform_pcs(w, m)
        return self.pcs

    def compute_features(self):
        pr = ProgressReporter()
        pr.set_progress_message('Computing features: {progress:.1f}%.')
        pr.set_complete_message('All features computed.')
        pr.value_max = self.n_chunks

        for i, j in self.iter_spikes():
            n = j - i

            # info("Extracting waveforms {} to {}...".format(i, j))
            w = self.waveforms[i:j]
            assert w.shape == (n, self.n_samples_w, self.n_channels)

            # info("Computing features of spikes {} to {}...".format(i, j))
            f = self._sd.features(w, self.pcs)
            assert f.shape == (n, self.n_channels, self.n_features_per_channel)

            yield f

            pr.increment()

    def compute_masks(self):
        for i, j in self.iter_spikes():
            n = j - i

            clu = self.spike_clusters[i:j]
            m = self.template_masks[clu]
            assert m.shape == (n, self.n_channels)

            yield m

    def create_kwik(self):
        # Create an empty Kwik file.
        info("Starting the conversion to Kwik...")
        create_kwik(kwik_path=self.kwik_path,
                    raw_data_files=[self.file],
                    prb_file=self.prb_file,
                    n_channels=self.n_total_channels,
                    sample_rate=self.sample_rate,
                    dtype=self.dtype,
                    nfeatures_per_channel=self.n_features_per_channel,
                    extract_s_after = self.extract_s_after,
                    extract_s_before = self.extract_s_before,
                    overwrite=True,
                    )

        # Compute PCs and features.
        if extract_features:
            info("Computing PCs...")
            self.compute_pcs()

            info("Computing features of all spikes...")
            # WARNING: watch out RAM usage here. We cannot use a generator because
            # the KwiKCreator only accepts lists at the moment.
            features = (f for f in self.compute_features())
            masks    = (m for m in self.compute_masks())
        else:
            info("Skipping PCA...")
            features = None
            masks = None
            self.n_features_per_channel = None

        # Add clusters.
        creator = KwikCreator(self.kwik_path)

        info("Adding the clusters in the kwik file.")
        creator.add_clustering(group=1,
                               name='main',
                               spike_clusters=self.spike_clusters,
                               template_waveforms=self.templates,
                               template_masks=self.template_masks,
                               template_amplitudes=self.amplitudes,
                               )

        # Add spikes.
        info("Adding the spikes in the kwik file.")
        creator.add_spikes(group=1,
                           spike_samples=self.spike_samples,
                           masks=masks,
                           features=features,
                           n_channels = self.n_channels,
                           n_features = self.n_features_per_channel
                           )

        # Add template amplitudes. We add these to the .kwik file, not the
        # .kwx, since they're lightweight enough that you can delete them
        # afterwards!


        info("Kwik file successfully created!")

    def template_explorer(self, name='templates'):
        """Mini GUI to explore the templates."""

        from phy.plot.waveforms import plot_waveforms
        from vispy.app import run

        p         = {}
        positions = []
        nodes     = []
        for key in c.probe['channel_groups'].keys():
          p.update(c.probe['channel_groups'][key]['geometry'])
          nodes     +=  c.probe['channel_groups'][key]['channels']
          positions += [p[channel] for channel in c.probe['channel_groups'][key]['channels']]
        idx       = np.argsort(nodes)
        positions = np.array(positions)[idx]

        self._n = 2
        wave    = np.zeros((0, self.n_samples_w, self.n_channels))
        w = plot_waveforms(channel_positions=positions,
                           waveforms=wave,
                           overlap=True,
                           alpha=1.,
                           probe_scale=(1.9, 1.0),
                           box_scale=(0.066, 0.01),
                           )

        # Show templates.
        if name == 'templates':
            templates = self.templates
            masks = self.template_masks

        # Show waveforms.
        elif name == 'waveforms':
            templates = self.waveforms
            masks     = self.template_masks[self.spike_clusters]

        @w.connect
        def on_key_press(e):
            if e.key == 'space':
                self._n += 1 if ('Shift' not in e.modifiers) else -1
                if name == 'templates':
                    info("Template {}.".format(self._n))
                    w.set_data(waveforms=templates[self._n],
                               masks=masks[self._n],
                               )
                elif name == 'waveforms':
                    sample = self.spike_samples[self._n]
                    cluster = self.spike_clusters[self._n]
                    info("Waveform {}, template={}, sample={}.".format(self._n,
                         cluster, sample))

                    wav = np.vstack((templates[self._n],
                                     self.templates[cluster][:-1][None, ...]))

                    m = np.vstack((masks[self._n],
                                   self.template_masks[cluster][None, ...]))
                    w.set_data(waveforms=wav,
                               masks=m,
                               spike_clusters=[0, 1],
                               )
        run()