def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper for wdm, wdm_ in ((WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()),): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.failUnless((d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm.forward(d3d) d3d_swap = d3d.swapaxes(1,2) self.failUnlessRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.failUnlessRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm.forward(dd) if dd_swap is not None: dd_wd_swap = wdm_swap.forward(dd_swap) self.failUnless((dd_wd == dd_wd_swap.swapaxes(1,2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.failUnless(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.failUnless(diff/ornorm < 1e-10)
def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6,4) ds = Dataset(data, sa={'timepoints': np.arange(6), 'multidim': data.copy()}, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s+1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2,-1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal(rds.samples, np.array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples - ws * 2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm(d2d) # use wavelet mapper for wdm, wdm_ in ( (WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()), ): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.failUnless( (d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples - ws * 2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm(d3d) d3d_swap = d3d.swapaxes(1, 2) self.failUnlessRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.failUnlessRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm(dd) if dd_swap is not None: dd_wd_swap = wdm_swap(dd_swap) self.failUnless( (dd_wd == dd_wd_swap.swapaxes(1, 2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.failUnless(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual( dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.failUnless(diff / ornorm < 1e-10)
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws*2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm.forward(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.failUnless(len(d3d_wds) == len(d3ds)+1) self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.failUnless(d3d_wds[0] == d3ds[0]) self.failUnless(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.failUnless(diff/ornorm < 1e-10) else: self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
def testIds(self): data = N.arange(20).reshape( (10,2) ) bcm = BoxcarMapper([1, 4, 6], 3) trans = bcm(data) self.failUnlessEqual(bcm.isValidInId( [1] ), True) self.failUnlessEqual(bcm.isValidInId( [0,1] ), False) self.failUnlessEqual(bcm.isValidOutId( [1] ), True) self.failUnlessEqual(bcm.isValidOutId( [3] ), False) self.failUnlessEqual(bcm.isValidOutId( [0,1] ), True) self.failUnlessEqual(bcm.isValidOutId( [0,1,0] ), False)
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws * 2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.failUnless(len(d3d_wds) == len(d3ds) + 1) self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.failUnless(d3d_wds[0] == d3ds[0]) self.failUnless(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.failUnless(diff / ornorm < 1e-10) else: self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6, 4) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2, -1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def plot_erp(data, SR=500, onsets=None, pre=0.2, pre_onset=None, post=None, pre_mean=None, color='r', errcolor=None, errtype=None, ax=pl, ymult=1.0, *args, **kwargs): """Plot single ERP on existing canvas Parameters ---------- data : 1D or 2D ndarray The data array can either be 1D (samples over time) or 2D (trials x samples). In the first case a boxcar mapper is used to extract the respective trial timecourses given a list of trial onsets. In the latter case, each row of the data array is taken as the EEG signal timecourse of a particular trial. onsets : list(int) List of onsets (in samples not in seconds). SR : int, optional Sampling rate (1/s) of the signal. pre : float, optional Duration (in seconds) to be plotted prior to onset. pre_onset : float or None If data is already in epochs (2D) then pre_onset provides information on how many seconds pre-stimulus were used to generate them. If None, then pre_onset = pre post : float Duration (in seconds) to be plotted after the onset. pre_mean : float Duration (in seconds) at the beginning of the window which is used for deriving the mean of the signal. If None, pre_mean = pre errtype : None or 'ste' or 'std' or 'ci95' or list of previous three Type of error value to be computed per datapoint. 'ste' -- standard error of the mean, 'std' -- standard deviation 'ci95' -- 95% confidence interval (1.96 * ste), None -- no error margin is plotted (default) Optionally, multiple error types can be specified in a list. In that case all of them will be plotted. color : matplotlib color code, optional Color to be used for plotting the mean signal timecourse. errcolor : matplotlib color code Color to be used for plotting the error margin. If None, use main color but with weak alpha level ax : Target where to draw. ymult : float, optional Multiplier for the values. E.g. if negative-up ERP plot is needed: provide ymult=-1.0 *args, **kwargs Additional arguments to `pylab.plot`. Returns ------- array Mean ERP timeseries. """ if pre_mean is None: pre_mean = pre # set default pre_discard = 0 if onsets is not None: # if we need to extract ERPs if post is None: raise ValueError, \ "Duration post onsets must be provided if onsets are given" # trial timecourse duration duration = pre + post # We are working with a full timeline bcm = BoxcarMapper(onsets, boxlength = int(SR * duration), offset = -int(SR * pre)) erp_data = bcm(data) # override values since we are using Boxcar pre_onset = pre else: if pre_onset is None: pre_onset = pre if pre_onset < pre: warning("Pre-stimulus interval to plot %g is smaller than provided " "pre-stimulus captured interval %g, thus plot interval was " "adjusted" % (pre, pre_onset)) pre = pre_onset if post is None: # figure out post duration = float(data.shape[1]) / SR - pre_discard post = duration - pre else: duration = pre + post erp_data = data pre_discard = pre_onset - pre # Scale the data appropriately erp_data *= ymult # validity check -- we should have 2D matrix (trials x samples) if len(erp_data.shape) != 2: raise RuntimeError, \ "plot_erp() supports either 1D data with onsets, or 2D data " \ "(trials x sample_points). Shape of the data at the point " \ "is %s" % erp_data.shape if not (pre_mean == 0 or pre_mean is None): # mean of pre-onset signal accross trials erp_baseline = np.mean( erp_data[:, int((pre_onset-pre_mean)*SR):int(pre_onset*SR)]) # center data on pre-onset mean # NOTE: make sure that we make a copy of the data to don't # alter the original. Better be safe than sorry erp_data = erp_data - erp_baseline # generate timepoints and error ranges to plot filled error area # top -> # bottom <- time_points = np.arange(erp_data.shape[1]) * 1.0 / SR - pre_onset # if pre != pre_onset if pre_discard > 0: npoints = int(pre_discard * SR) time_points = time_points[npoints:] erp_data = erp_data[:, npoints:] # select only time points of interest (if post is provided) if post is not None: npoints = int(duration * SR) time_points = time_points[:npoints] erp_data = erp_data[:, :npoints] # compute mean signal timecourse accross trials erp_mean = np.mean(erp_data, axis=0) # give sane default if errtype is None: errtype = [] if not isinstance(errtype, list): errtype = [errtype] for et in errtype: # compute error per datapoint if et in ['ste', 'ci95']: erp_stderr = erp_data.std(axis=0) / np.sqrt(len(erp_data)) if et == 'ci95': erp_stderr *= 1.96 elif et == 'std': erp_stderr = erp_data.std(axis=0) else: raise ValueError, "Unknown error type '%s'" % errtype time_points2w = np.hstack((time_points, time_points[::-1])) error_top = erp_mean + erp_stderr error_bottom = erp_mean - erp_stderr error2w = np.hstack((error_top, error_bottom[::-1])) if errcolor is None: errcolor = color # plot error margin pfill = ax.fill(time_points2w, error2w, edgecolor=errcolor, facecolor=errcolor, alpha=0.2, zorder=3) # plot mean signal timecourse ax.plot(time_points, erp_mean, lw=2, color=color, zorder=4, *args, **kwargs) # ax.xaxis.set_major_locator(pl.MaxNLocator(4)) return erp_mean
def eventrelated_dataset(ds, events=None, time_attr=None, match="prev", eprefix="event"): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev["onset"], tvec, conv_strategy) # store offset of sample time and real onset ev["orig_offset"] = ev["onset"] - tvec[idx] # rescue the real onset into a new attribute ev["orig_onset"] = ev["onset"] ev["orig_duration"] = ev["duration"] # figure out how many sample we need ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]]) # new onset is sample index ev["onset"] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ["onset", "duration"]: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars["duration"]) if __debug__: if not max(evvars["duration"]) == min(evvars["duration"]): warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars["onset"], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + "_" + a] = ds.sa[a] if a in ["onset", "duration"]: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def testSimple(self): """Just the same tests as for transformWithBoxcar. Mention that BoxcarMapper doesn't apply function with each boxcar """ data = N.arange(10) sp = N.arange(10) # check if stupid thing don't work self.failUnlessRaises(ValueError, BoxcarMapper, sp, 0 ) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm(data) # ,0 is a feature below, so we get explicit 2D out of 1D self.failUnless( (trans[:,0] == data).all() ) # now check for illegal boxes self.failUnlessRaises(ValueError, BoxcarMapper(sp, 2), data) # now something that should work sp = N.arange(9) bcm = BoxcarMapper(sp,2) trans = bcm(data) self.failUnless( (trans == N.vstack((N.arange(9), N.arange(9)+1)).T ).all() ) # now test for proper data shape data = N.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4)(data) self.failUnless( trans.shape == (4,4,3,4,2) ) # test reverse data = N.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] m = BoxcarMapper(sp, 2) mp = m.forward(data) self.failUnless(mp.shape == (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match self.failUnless(mr.shape == data.shape) # first two samples where not in any of the boxcars and cannot be # reconstructed self.failUnless(N.sum(mr[:2]) == 0) # same for the last ones self.failUnless(N.sum(mr[7:]) == 0) # check proper reconstruction of non-conflicting sample self.failUnless((mr[2].ravel() == N.arange(48, 72)).all()) # check proper reconstruction of samples being part of multiple # mapped samples self.failUnless((mr[3].ravel() == N.arange(72, 96)).all()) # test reverse of a single sample singlesample = N.arange(48).reshape(2, 3, 4, 2) self.failUnless((singlesample == m.reverse(singlesample)).all()) # should not work for shape mismatch self.failUnlessRaises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward(N.arange(24).reshape(3, 4, 2)) self.failUnless((bc == N.array(2 * [N.arange(24).reshape(3, 4, 2)])).all())
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event'): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many sample we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds