def test_bin_data(self): binsizes = np.arange(4) + 1 shapes = (11, 4), (4, 11), (10, 3), (3, 10), (2, 2) args = itertools.product(binsizes.tolist(), *zip(*shapes)) for nrows, ncols, binsize in args: x = np.random.rand(nrows, ncols) bins = cast(np.r_[:binsize:x.shape[0]], np.uint64) binned = np.empty((bins.shape[0] - 1, x.shape[1]), np.uint64) bin_data(x.view(np.uint8), bins, binned) self.assertRaises(ValueError, bin_data, x, bins, binned) self.assertRaises(ValueError, bin_data, x.view(np.int8), bins, binned) self.assertTupleEqual(binned.shape, (bins.size - 1, ncols)) self.assertEqual(binned.dtype, np.uint64)
def bin(self, cleared, binsize, reject_count=100, dropna=False): """Bin spike data by `binsize` millisecond bins. Roughly, sum up the ones (and zeros) in the data using bins of size `binsize`. See :func:span.utils.utils.bin_data for the actual loop that executes this binning. This method is a wrapper around that function. Parameters ---------- cleared : array_like The "refractory-period-cleared" array of booleans to bin. binsize : numbers.Real The size of the bins to use, in milliseconds reject_count : numbers.Real, optional, default 100 Assign ``NaN`` to channels whose firing rates are less than this number over the whole recording. dropna : bool, optional Whether to drop NaN'd values if any Raises ------ AssertionError * If `binsize` is not a positive number or if `reject_count` is not a nonnegative number Returns ------- binned : SpikeGroupedDataFrame of float64 See Also -------- span.utils.utils.bin_data """ assert binsize > 0 and isinstance(binsize, numbers.Real), \ '"binsize" must be a positive number' assert reject_count >= 0 and isinstance(reject_count, numbers.Real), \ '"reject_count" must be a nonnegative real number' ms_per_s = 1e3 bin_samples = cast(np.floor(binsize * self.fs / ms_per_s), np.uint64) bins = np.arange(0, self.nsamples - 1, bin_samples, np.uint64) shape = bins.shape[0] - 1, cleared.shape[1] btmp = np.empty(shape, np.uint64) bin_data(cleared.values.view(np.uint8), bins, btmp) # make a datetime index of milliseconds freq = binsize * datetools.Milli() index = date_range(start=self.date, periods=btmp.shape[0], freq=freq, name=r'$t\left(i\right)$', tz='US/Eastern') + freq binned = DataFrame(btmp, index=index, columns=cleared.columns, dtype=np.float64) # samples / (samples / s) == s rec_len_s = self.nsamples / self.fs # spikes / s min_sp_per_s = reject_count / rec_len_s # spikes / s * ms / ms == spikes / s sp_per_s = binned.mean() * ms_per_s / binsize # get rid of channels who have less then "reject_count" spikes over # the whole recording binned.ix[:, sp_per_s < min_sp_per_s] = np.nan if dropna: binned = binned.dropna(axis=1) return SpikeGroupedDataFrame(binned)