def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6,4) ds = Dataset(data, sa={'timepoints': np.arange(6), 'multidim': data.copy()}, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s+1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2,-1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal(rds.samples, np.array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6, 4) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2, -1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def eventrelated_dataset(ds, events=None, time_attr=None, match="prev", eprefix="event"): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev["onset"], tvec, conv_strategy) # store offset of sample time and real onset ev["orig_offset"] = ev["onset"] - tvec[idx] # rescue the real onset into a new attribute ev["orig_onset"] = ev["onset"] ev["orig_duration"] = ev["duration"] # figure out how many sample we need ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]]) # new onset is sample index ev["onset"] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ["onset", "duration"]: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars["duration"]) if __debug__: if not max(evvars["duration"]) == min(evvars["duration"]): warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars["onset"], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + "_" + a] = ds.sa[a] if a in ["onset", "duration"]: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event'): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many sample we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds