Exemplo n.º 1
0
def test_project_pcs():
    x1 = np.random.randn(20000, 2) * np.array([[10., 1.]])
    x2 = np.random.randn(20000, 2) * np.array([[1., 10.]])
    x = np.dstack((x1, x2))
    # Compute the PCs.
    pcs = compute_pcs(x)
    # Project the PCs.
    x_proj = project_pcs(x[0, ...], pcs)
    assert x_proj.shape == (2, 2)
Exemplo n.º 2
0
def save_features(experiment, **prm):
    """Compute the features from the waveforms and save them in the experiment
    dataset."""
    nwaveforms_max = prm['pca_nwaveforms_max']
    npcs = prm['nfeatures_per_channel']
    kwik = experiment._files['kwik']

    for chgrp in iterkeys(experiment.channel_groups):
        spikes = experiment.channel_groups[chgrp].spikes
        # Extract a subset of the saveforms.
        nspikes = len(spikes)

        # We convert the extendable features_masks array to a
        # contiguous array.
        if prm.get('features_contiguous', True):
            # Make sure to update the PyTables node after the recreation,
            # to avoid ClosedNodeError.
            spikes.features_masks = to_contiguous(spikes.features_masks,
                                                  nspikes=nspikes)
        else:
            warn(("The features array has not been converted to a contiguous "
                  "array."))

        # Skip the channel group if there are no spikes.
        if nspikes == 0:
            continue
        nwaveforms = min(nspikes, nwaveforms_max)
        step = excerpt_step(nspikes, nexcerpts=nwaveforms, excerpt_size=1)
        waveforms_subset = spikes.waveforms_filtered[::step]

        # With this option, PCs are directly provided in the PRM file as
        # a NumPy array
        if prm.get('canonical_pcs', None) is not None:
            pcs = prm['canonical_pcs']
            assert isinstance(pcs, np.ndarray)
        else:
            # We take the masks in order to compute the PCs only on
            # the unmasked spikes, for each channel.
            masks = spikes.features_masks[::step, ::npcs,
                                          1]  # (nspikes, nchannels)
            # Compute the PCs.
            pcs = compute_pcs(waveforms_subset, npcs=npcs, masks=masks)

        # Add PCs to the KWIK file
        kwik.createArray(experiment.channel_groups[chgrp]._node,
                         'pca_waveforms', pcs)

        # Project the waveforms on the PCs and compute the features.
        # WARNING: optimization: we could load and project waveforms by chunks.
        for i, waveform in enumerate(spikes.waveforms_filtered):
            # Convert waveforms from int16 to float32 with scaling
            # before computing PCA so as to avoid getting huge numbers.
            waveform = convert_dtype(waveform, np.float32)
            features = project_pcs(waveform, pcs)
            spikes.features_masks[i, :, 0] = features.ravel()
Exemplo n.º 3
0
def test_project_pcs():
    x1 = np.random.randn(20000, 2) * np.array([[10., 1.]])
    x2 = np.random.randn(20000, 2) * np.array([[1., 10.]])
    x = np.dstack((x1, x2))
    # Compute the PCs.
    pcs = compute_pcs(x)
    # Project the PCs.
    x_proj = project_pcs(x[0,...], pcs)
    assert x_proj.shape == (2, 2)
    
    
Exemplo n.º 4
0
def test_compute_pcs_3d():
    """Test PCA on a 3D array."""
    x1 = np.random.randn(20000, 2) * np.array([[10., 1.]])
    x2 = np.random.randn(20000, 2) * np.array([[1., 10.]])
    x = np.dstack((x1, x2))
    # Compute the PCs.
    pcs = compute_pcs(x)
    assert pcs.ndim == 3
    assert np.linalg.norm(pcs[0, :, 0] - np.array([-1., 0.])) < 1e-2
    assert np.linalg.norm(pcs[1, :, 0] - np.array([0., -1.])) < 1e-2
    assert np.linalg.norm(pcs[0, :, 1] - np.array([0, 1.])) < 1e-2
    assert np.linalg.norm(pcs[1, :, 1] - np.array([-1., 0.])) < 1e-2
Exemplo n.º 5
0
def test_compute_pcs():
    """Test PCA on a 2D array."""
    # Horizontal ellipsoid.
    x = np.random.randn(20000, 2) * np.array([[10., 1.]])
    # Rotate the points by pi/4.
    a = 1. / np.sqrt(2.)
    rot = np.array([[a, -a], [a, a]])
    x = np.dot(x, rot)
    # Compute the PCs.
    pcs = compute_pcs(x)
    assert pcs.ndim == 2
    assert (np.abs(pcs) - a).max() < 1e-2
Exemplo n.º 6
0
def test_compute_pcs_3d():
    """Test PCA on a 3D array."""
    x1 = np.random.randn(20000, 2) * np.array([[10., 1.]])
    x2 = np.random.randn(20000, 2) * np.array([[1., 10.]])
    x = np.dstack((x1, x2))
    # Compute the PCs.
    pcs = compute_pcs(x)
    assert pcs.ndim == 3
    assert np.linalg.norm(pcs[0,:,0] - np.array([-1., 0.])) < 1e-2
    assert np.linalg.norm(pcs[1,:,0] - np.array([0., -1.])) < 1e-2
    assert np.linalg.norm(pcs[0,:,1] - np.array([0, 1.])) < 1e-2
    assert np.linalg.norm(pcs[1,:,1] - np.array([-1., 0.])) < 1e-2
Exemplo n.º 7
0
def test_compute_pcs():
    """Test PCA on a 2D array."""
    # Horizontal ellipsoid.
    x = np.random.randn(20000, 2) * np.array([[10., 1.]])
    # Rotate the points by pi/4.
    a = 1./np.sqrt(2.)
    rot = np.array([[a, -a], [a, a]])
    x = np.dot(x, rot)
    # Compute the PCs.
    pcs = compute_pcs(x)
    assert pcs.ndim == 2
    assert (np.abs(pcs) - a).max() < 1e-2
Exemplo n.º 8
0
def save_features(experiment, **prm):
    """Compute the features from the waveforms and save them in the experiment
    dataset."""
    nwaveforms_max = prm['pca_nwaveforms_max']
    npcs = prm['nfeatures_per_channel']
    kwik = experiment._files['kwik']

    for chgrp in iterkeys(experiment.channel_groups):
        spikes = experiment.channel_groups[chgrp].spikes
        # Extract a subset of the saveforms.
        nspikes = len(spikes)

        # We convert the extendable features_masks array to a
        # contiguous array.
        if prm.get('features_contiguous', True):
            # Make sure to update the PyTables node after the recreation,
            # to avoid ClosedNodeError.
            spikes.features_masks = to_contiguous(spikes.features_masks, nspikes=nspikes)
        else:
            warn(("The features array has not been converted to a contiguous "
                  "array."))

        # Skip the channel group if there are no spikes.
        if nspikes == 0:
            continue
        nwaveforms = min(nspikes, nwaveforms_max)
        step = excerpt_step(nspikes, nexcerpts=nwaveforms, excerpt_size=1)
        waveforms_subset = spikes.waveforms_filtered[::step]

        # With this option, PCs are directly provided in the PRM file as
        # a NumPy array
        if prm.get('canonical_pcs', None) is not None:
            pcs = prm['canonical_pcs']
            assert isinstance(pcs, np.ndarray)
        else:
            # We take the masks in order to compute the PCs only on
            # the unmasked spikes, for each channel.
            masks = spikes.features_masks[::step,::npcs,1]  # (nspikes, nchannels)
            # Compute the PCs.
            pcs = compute_pcs(waveforms_subset, npcs=npcs, masks=masks)

        # Add PCs to the KWIK file
        kwik.createArray(experiment.channel_groups[chgrp]._node, 'pca_waveforms',
            pcs)

        # Project the waveforms on the PCs and compute the features.
        # WARNING: optimization: we could load and project waveforms by chunks.
        for i, waveform in enumerate(spikes.waveforms_filtered):
            # Convert waveforms from int16 to float32 with scaling
            # before computing PCA so as to avoid getting huge numbers.
            waveform = convert_dtype(waveform, np.float32)
            features = project_pcs(waveform, pcs)
            spikes.features_masks[i,:,0] = features.ravel()