示例#1
0
def test_datasetmapping():
    # 6 samples, 4 features
    data = np.arange(24).reshape(6,4)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, inspace='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength, ds.nfeatures))
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx,
                       np.repeat(np.arange(boxlength), 4).reshape(2,-1))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(rds.samples,
                       np.array([[ 0,  1,  2,  3],
                                [ 4,  5,  6,  7],
                                [ 4,  5,  6,  7],
                                [ 8,  9, 10, 11],
                                [16, 17, 18, 19],
                                [20, 21, 22, 23]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)
def test_datasetmapping():
    # 6 samples, 4 features
    data = np.arange(24).reshape(6, 4)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, inspace='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength, ds.nfeatures))
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx,
                       np.repeat(np.arange(boxlength), 4).reshape(2, -1))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11],
                  [16, 17, 18, 19], [20, 21, 22, 23]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)
示例#3
0
def eventrelated_dataset(ds, events=None, time_attr=None, match="prev", eprefix="event"):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additonal attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <ChainMapper: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev["onset"], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev["orig_offset"] = ev["onset"] - tvec[idx]
            # rescue the real onset into a new attribute
            ev["orig_onset"] = ev["onset"]
            ev["orig_duration"] = ev["duration"]
            # figure out how many sample we need
            ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]])
            # new onset is sample index
            ev["onset"] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k)
    # checks
    for p in ["onset", "duration"]:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars["duration"])
    if __debug__:
        if not max(evvars["duration"]) == min(evvars["duration"]):
            warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars["onset"], boxlength, inspace=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    # at last reflatten the dataset
    # could we add some meaningful attribute during this mapping, i.e. would
    # assigning 'inspace' do something good?
    ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + "_" + a] = ds.sa[a]
        if a in ["onset", "duration"]:
            # special case: we want the non-descrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds
示例#4
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:,0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T)


    # now test for proper data shape
    data = np.ones((10,3,4,2))
    sp = [ 2, 4, 3, 5 ]
    trans = BoxcarMapper(sp, 4)(data)
    assert_equal(trans.shape, (4,4,3,4,2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [ 2, 4, 3, 5 ]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:, 0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(),
                       np.vstack((np.arange(9), np.arange(9) + 1)).T)

    # now test for proper data shape
    data = np.ones((10, 3, 4, 2))
    sp = [2, 4, 3, 5]
    trans = BoxcarMapper(sp, 4)(data)
    assert_equal(trans.shape, (4, 4, 3, 4, 2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [2, 4, 3, 5]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
示例#6
0
def eventrelated_dataset(ds,
                         events=None,
                         time_attr=None,
                         match='prev',
                         eprefix='event'):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additonal attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <ChainMapper: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many sample we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all "
                             "events (could not find '%s')" % k)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, inspace=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    # at last reflatten the dataset
    # could we add some meaningful attribute during this mapping, i.e. would
    # assigning 'inspace' do something good?
    ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + '_' + a] = ds.sa[a]
        if a in ['onset', 'duration']:
            # special case: we want the non-descrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds