Exemplo n.º 1
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]] * n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [
         random_affine_transformation(dss_rotated[i]) for i in xrange(n)
     ]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [
         m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)
     ]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
         "reconstructed SVD dataset. Got correlations %s." % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(np.corrcoef(sd.samples.T,
                                    ds_orig.samples.T)[nf_true:, :nf_true],
                        k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
                     msg="Expected original dimensions after "
                     "SVD. Got correlations %s." % sv_corrs_orig)
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig,
                                            scale_fac=1.0,
                                            shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack(
             [ds_, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
Exemplo n.º 3
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_( merged.nfeatures == 5 )
    l12 = [1]*5 + [2]*3
    l1 = [1]*8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1]*5 + [2]*3 + [3]*2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1]*5 + [0]*5)
Exemplo n.º 4
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_(merged.nfeatures == 5)
    l12 = [1] * 5 + [2] * 3
    l1 = [1] * 8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1] * 5 + [2] * 3 + [3] * 2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1] * 5 + [0] * 5)
Exemplo n.º 5
0
def test_stack_add_dataset_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.a['one'] = np.ones(2)
    data0.a['two'] = 2
    data0.a['three'] = 'three'
    data0.a['common'] = range(10)
    data0.a['array'] = np.arange(10)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1.a['one'] = np.ones(3)
    data1.a['two'] = 3
    data1.a['four'] = 'four'
    data1.a['common'] = range(10)
    data1.a['array'] = np.arange(10)


    vstacker = lambda x: vstack((data0, data1), a=x)
    hstacker = lambda x: hstack((data0, data1), a=x)

    add_params = (1, None, 'unique', 'uniques', 'all', 'drop_nonunique')

    for stacker in (vstacker, hstacker):
        for add_param in add_params:
            if add_param == 'unique':
                assert_raises(DatasetError, stacker, add_param)
                continue

            r = stacker(add_param)

            if add_param == 1:
                assert_array_equal(data1.a.one, r.a.one)
                assert_equal(r.a.two, 3)
                assert_equal(r.a.four, 'four')
                assert_true('three' not in r.a.keys())
                assert_true('array' in r.a.keys())
            elif add_param == 'uniques':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.four, ('four',))
            elif add_param == 'all':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.three, ('three', None))
            elif add_param == 'drop_nonunique':
                assert_equal(set(r.a.keys()),
                             set(['common', 'three', 'four', 'array']))
                assert_equal(r.a.three, 'three')
                assert_equal(r.a.four, 'four')
                assert_equal(r.a.common, range(10))
                assert_array_equal(r.a.array, np.arange(10))
Exemplo n.º 6
0
def test_hstack():
    """Additional tests for hstacking of datasets
    """
    ds3d = datasets['3dsmall']
    nf1 = ds3d.nfeatures
    nf3 = 3 * nf1
    ds3dstacked = hstack((ds3d, ds3d, ds3d))
    ok_(ds3dstacked.nfeatures == nf3)
    for fav in ds3dstacked.fa.itervalues():
        v = fav.value
        ok_(len(v) == nf3)
        assert_array_equal(v[:nf1], v[nf1:2 * nf1])
        assert_array_equal(v[2 * nf1:], v[nf1:2 * nf1])
Exemplo n.º 7
0
def test_hstack():
    """Additional tests for hstacking of datasets
    """
    ds3d = datasets['3dsmall']
    nf1 = ds3d.nfeatures
    nf3 = 3 * nf1
    ds3dstacked = hstack((ds3d, ds3d, ds3d))
    ok_(ds3dstacked.nfeatures == nf3)
    for fav in ds3dstacked.fa.itervalues():
        v = fav.value
        ok_(len(v) == nf3)
        assert_array_equal(v[:nf1], v[nf1:2*nf1])
        assert_array_equal(v[2*nf1:], v[nf1:2*nf1])
Exemplo n.º 8
0
 def transform(self, ds):
     
     ds_ = SampleSlicer(self._selection).transform(ds)
     
     iterable = [np.unique(ds_.sa[a].value) for a in self._attr]
     
     ds_stack = []
     for attr in product(*iterable):
         
         mask = np.ones_like(ds_.targets, dtype=np.bool)
         
         for i, a in enumerate(attr):
             mask = np.logical_and(mask, ds_.sa[self._attr[i]].value == a)
         
         ds_stacked = hstack([d for d in ds_[mask]])
         ds_stacked = self.update_attribute(ds_stacked)
         ds_stack.append(ds_stacked)
     
     return vstack(ds_stack)
Exemplo n.º 9
0
    def transform(self, ds):

        ds_ = SampleSlicer(self._selection).transform(ds)

        iterable = [np.unique(ds_.sa[a].value) for a in self._attr]

        ds_stack = []
        for attr in product(*iterable):

            mask = np.ones_like(ds_.targets, dtype=np.bool)

            for i, a in enumerate(attr):
                mask = np.logical_and(mask, ds_.sa[self._attr[i]].value == a)

            ds_stacked = hstack([d for d in ds_[mask]])
            ds_stacked = self.update_attribute(ds_stacked)
            ds_stack.append(ds_stacked)

        return vstack(ds_stack)
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig, scale_fac=1.0, shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack([ds_, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
Exemplo n.º 11
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]]*n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [random_affine_transformation(dss_rotated[i])
                    for i in xrange(n)]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [m.forward(ds_)
                 for m, ds_ in zip(mappers_orig, dss_rotated)]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_)
                     for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
             "reconstructed SVD dataset. Got correlations %s."
             % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(
             np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true],
             k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
         msg="Expected original dimensions after "
             "SVD. Got correlations %s."
             % sv_corrs_orig)
Exemplo n.º 12
0
    def _call(self, ds):
        # local binding
        generator = self._generator
        node = self._node
        ca = self.ca
        space = self.get_space()
        concat_as = self._concat_as

        if self.ca.is_enabled("stats") and (not 'stats' in node.ca or
                                            not node.ca.is_enabled("stats")):
            warning("'stats' conditional attribute was enabled, but "
                    "the assigned node '%s' either doesn't support it, "
                    "or it is disabled" % node)
        # precharge conditional attributes
        ca.datasets = []

        # run the node an all generated datasets
        results = []
        for i, sds in enumerate(generator.generate(ds)
                                if generator
                                else [ds]):
            if __debug__:
                debug('REPM', "%d-th iteration of %s on %s",
                      (i, self, sds))
            if ca.is_enabled("datasets"):
                # store dataset in ca
                ca.datasets.append(sds)
            # run the beast
            result = node(sds)
            # callback
            if self._callback is not None:
                self._callback(data=sds, node=node, result=result)
            # subclass postprocessing
            result = self._repetition_postcall(sds, node, result)
            if space:
                # XXX maybe try to get something more informative from the
                # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks'
                # to indicate what was trained and what was tested. Now it is
                # more tricky, because `node` could be anything
                result.set_attr(space, (i,))
            # store
            results.append(result)

            if ca.is_enabled("stats") and 'stats' in node.ca \
               and node.ca.is_enabled("stats"):
                if not ca.is_set('stats'):
                    # create empty stats container of matching type
                    ca.stats = node.ca['stats'].value.__class__()
                # harvest summary stats
                ca['stats'].value.__iadd__(node.ca['stats'].value)

        # charge condition attribute
        self.ca.repetition_results = results

        # stack all results into a single Dataset
        if concat_as == 'samples':
            results = vstack(results, True)
        elif concat_as == 'features':
            results = hstack(results, True)
        else:
            raise ValueError("Unknown concatenation mode '%s'" % concat_as)
        # no need to store the raw results, since the Measure class will
        # automatically store them in a CA
        return results
Exemplo n.º 13
0
    def _call(self, ds):
        # local binding
        generator = self._generator
        node = self._node
        ca = self.ca
        space = self.get_space()
        concat_as = self._concat_as

        if self.ca.is_enabled("stats") and (not 'stats' in node.ca or
                                            not node.ca.is_enabled("stats")):
            warning("'stats' conditional attribute was enabled, but "
                    "the assigned node '%s' either doesn't support it, "
                    "or it is disabled" % node)
        # precharge conditional attributes
        ca.datasets = []

        # run the node an all generated datasets
        results = []
        for i, sds in enumerate(generator.generate(ds) if generator else [ds]):
            if __debug__:
                debug('REPM', "%d-th iteration of %s on %s", (i, self, sds))
            if ca.is_enabled("datasets"):
                # store dataset in ca
                ca.datasets.append(sds)
            # run the beast
            result = node(sds)
            # callback
            if self._callback is not None:
                self._callback(data=sds, node=node, result=result)
            # subclass postprocessing
            result = self._repetition_postcall(sds, node, result)
            if space:
                # XXX maybe try to get something more informative from the
                # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks'
                # to indicate what was trained and what was tested. Now it is
                # more tricky, because `node` could be anything
                result.set_attr(space, (i, ))
            # store
            results.append(result)

            if ca.is_enabled("stats") and 'stats' in node.ca \
               and node.ca.is_enabled("stats"):
                if not ca.is_set('stats'):
                    # create empty stats container of matching type
                    ca.stats = node.ca['stats'].value.__class__()
                # harvest summary stats
                ca['stats'].value.__iadd__(node.ca['stats'].value)

        # charge condition attribute
        self.ca.repetition_results = results

        # stack all results into a single Dataset
        if concat_as == 'samples':
            results = vstack(results, True)
        elif concat_as == 'features':
            results = hstack(results, True)
        else:
            raise ValueError("Unknown concatenation mode '%s'" % concat_as)
        # no need to store the raw results, since the Measure class will
        # automatically store them in a CA
        return results
Exemplo n.º 14
0
def hstack(dsets, pad_to_feature_index=None, hstack_method='drop_nonunique',
                set_empty_value=0.):
    '''Stacks NIML datasets while considering node indices

    Parameters
    ----------
    dsets: list
        datasets to be stacked
    pad_to_feature_index: list or int or None
        If a list then it should be of the same length as dsets and indicates
        to which node index the input should be padded. A single int means
        that the same value is used for all dset in dsets. None means
        no padding, and is only allowed for non-sparse datasets.
    hstack_method: str:
        How datasets are stacked; see dataset.hstack.
    set_empty_value: float
        Value to which empty (padded) dataset values are set.

    Returns
    dset: Dataset
        Data combined from all dset in dsets.
    '''

    n = len(dsets)

    # make sure pad_to_feature_index has n values
    if pad_to_feature_index is None or type(pad_to_feature_index) is int:
        pad_to_feature_index = [pad_to_feature_index] * n
    elif len(pad_to_feature_index) != n:
        raise ValueError("illegal pad_to_feature_index: expected list or int")

    # labels that can contain node indices
    node_indices_labels = ('node_indices', 'center_ids', 'ids', 'roi_ids')
    node_indices = []

    # allocate space for output
    padded_dsets = []
    hstack_indices = []
    first_node_index = 0
    for i, (dset, pad_to) in enumerate(zip(dsets, pad_to_feature_index)):
        # get node indices in this dataset
        node_index = _find_node_indices(dset, node_indices_labels)
        if node_index is None:
            node_index = np.arange(dset.nfeatures)
        max_node_index = np.max(node_index)

        # make a stripped version - without node index labels
        stripped_dset = dset.copy()
        for label in node_indices_labels:
            if label in stripped_dset.fa:
                stripped_dset.fa.pop(label)

        # see if padding is needed
        if pad_to is None or pad_to == max_node_index + 1:
            if not np.array_equal(np.arange(max_node_index + 1), np.sort(node_index)):
                raise ValueError("Sparse input %d: need pad_to input" % (i + 1))
            padded_dset = stripped_dset
            other_index = np.arange(0)
        else:
            # have to use empty values
            nfeatures_empty = pad_to - dset.nfeatures
            if nfeatures_empty < 0:
                raise ValueError("Dataset has %d features, cannot pad "
                                    "to %d" % (dset.nfeatures, pad_to))

            # make empty array
            empty_arr = np.zeros((dset.nsamples, nfeatures_empty),
                                    dtype=dset.samples.dtype) + set_empty_value
            empty_dset = Dataset(empty_arr, sa=stripped_dset.sa.copy(deep=True))

            # combine current dset and empty array
            padded_dset = dataset.hstack((stripped_dset, empty_dset), hstack_method)

            # set the proper node indices
            other_index = np.setdiff1d(np.arange(pad_to), node_index)

        # sanity check to make sure that indices are ok
        # XXX could be more informative
        if len(np.setdiff1d(node_index, np.arange(pad_to or max_node_index + 1))):
            raise ValueError("Illegal indices")

        hstack_index = node_index + first_node_index
        hstack_other_index = other_index + first_node_index
        first_node_index += pad_to or (max_node_index + 1) # prepare for next iteration

        padded_dsets.append(padded_dset)
        hstack_indices.append(hstack_index)
        if len(other_index):
            hstack_indices.append(hstack_other_index)

    hstack_dset = dataset.hstack(padded_dsets, hstack_method)
    hstack_indices = np.hstack(hstack_indices)

    hstack_dset.fa[node_indices_labels[0]] = hstack_indices

    return hstack_dset
Exemplo n.º 15
0
 def _get_connectomes(self, datasets):
     params = self.params
     # If no precomputed connectomes are supplied, compute them.
     if params.connectomes is not None and os.path.exists(params.connectomes):
         _chpaldebug("Loading pre-computed connectomes from %s" % params.connectomes)
         connectomes = h5load(params.connectomes)
         return connectomes
     connectivity_mapper = FxyMapper(params.conn_metric)
     # Initializing datasets with original anatomically aligned datasets
     mfm = MeanFeatureMeasure()
     # TODO Handle seed_radius if seed queryengines are not provided
     seed_radius = params.seed_radius
     _chpaldebug("Performing surface connectivity hyperalignment with seeds")
     _chpaldebug("Computing connectomes.")
     ndatasets = len(datasets)
     if params.seed_queryengines is None:
         raise NotImplementedError("For now, we need seed queryengines.")
     qe_all = super(ConnectivityHyperalignment, self)._get_trained_queryengines(
         datasets, params.seed_queryengines, seed_radius, params.ref_ds)
     # If seed_indices are not supplied, use all as centers
     if not params.seed_indices:
         roi_ids = super(ConnectivityHyperalignment, self)._get_verified_ids(qe_all)
     else:
         roi_ids = params.seed_indices
     if len(qe_all) == 1:
         qe_all *= ndatasets
     # Computing Seed means to be used for aligning seed features
     seed_means = [self._get_seed_means(MeanFeatureMeasure(), qe, ds, params.seed_indices)
                   for qe, ds in zip(qe_all, datasets)]
     if params.npcs is None:
         conn_targets = []
         for seed_mean in seed_means:
             zscore(seed_mean, chunks_attr=None)
             conn_targets.append(seed_mean)
     else:
         # compute all PC-seed connectivity in each subject
         # 1. make common model SVs in each seed SL based on connectivity to seed_means
         # 2. Use these SVs for computing connectomes
         _chpaldebug("Aligning SVs in each searchlight across subjects")
         # Looping over all seeds in which SVD is done
         pc_data = [[] for isub in range(ndatasets)]
         sl_common_models = dict()
         if params.common_model is not None and os.path.exists(params.common_model):
             _chpaldebug("Loading common model from %s" % params.common_model)
             common_model = h5load(params.common_model)
             sl_common_models = common_model['local_models']
         for inode in roi_ids:
             # For each SL, computing connectivity of features to seed means
             # This line below doesn't need common model
             sl_connectomes = self._get_sl_connectomes(seed_means, qe_all, datasets,
                                                       inode, connectivity_mapper)
             # Hyperalign connectomes in SL
             # XXX TODO Common model input to below function should be updated.
             local_common_model = sl_common_models[inode][:, :params.npcs] \
                                     if params.common_model else None
             sl_hmappers, svm, sl_common_model = self._get_hypesvs(sl_connectomes,
                                             local_common_model=local_common_model)
             if sl_common_model is not None:
                 sl_common_models[inode] = sl_common_model
             # make common model SV timeseries data in each subject
             for sd, slhm, qe, pcd in zip(datasets, sl_hmappers, qe_all, pc_data):
                 sd_svs = slhm.forward(sd[:, qe[inode]])
                 zscore(sd_svs, chunks_attr=None)
                 if svm is not None:
                     sd_svs = svm.forward(sd_svs)
                     sd_svs = sd_svs[:, :params.npcs]
                     zscore(sd_svs, chunks_attr=None)
                 pcd.append(sd_svs)
         if params.save_model is not None:
             # TODO: should use debug
             print('Saving local models to %s' % params.save_model)
             h5save(params.save_model, sl_common_models)
         pc_data = [hstack(pcd) for pcd in pc_data]
         conn_targets = pc_data
         #print pc_data[-1]
     # compute connectomes using connectivity targets (PCs or seed means)
     connectomes = []
     if params.common_model is not None and os.path.exists(params.common_model):
         # TODO: should use debug
         print('Loading from saved common model: %s' % params.common_model)
         connectome_model = common_model['connectome_model']
         connectomes.append(connectome_model)
     for t_, ds in zip(conn_targets, datasets):
         connectivity_mapper.train(t_)
         connectome = connectivity_mapper.forward(ds)
         t_ = None
         connectome.fa = ds.fa
         if connectome.samples.dtype == 'float64':
             connectome.samples = connectome.samples.astype('float32')
         zscore(connectome, chunks_attr=None)
         connectomes.append(connectome)
     if params.connectomes is not None and not os.path.exists(params.connectomes):
         _chpaldebug("Saving connectomes to ", params.connectomes)
         h5save(params.connectomes, connectomes)
     return connectomes
Exemplo n.º 16
0
def hstack(dsets, pad_to_feature_index=None, hstack_method='drop_nonunique',
           set_empty_value=0.):
    '''Stacks NIML datasets while considering node indices

    Parameters
    ----------
    dsets: list
        datasets to be stacked
    pad_to_feature_index: list or int or None
        If a list then it should be of the same length as dsets and indicates
        to which node index the input should be padded. A single int means
        that the same value is used for all dset in dsets. None means
        no padding, and is only allowed for non-sparse datasets.
    hstack_method: str:
        How datasets are stacked; see dataset.hstack.
    set_empty_value: float
        Value to which empty (padded) dataset values are set.

    Returns
    dset: Dataset
        Data combined from all dset in dsets.
    '''

    n = len(dsets)

    # make sure pad_to_feature_index has n values
    if pad_to_feature_index is None or type(pad_to_feature_index) is int:
        pad_to_feature_index = [pad_to_feature_index] * n
    elif len(pad_to_feature_index) != n:
        raise ValueError("illegal pad_to_feature_index: expected list or int")

    # labels that can contain node indices
    node_indices_labels = ('node_indices', 'center_ids', 'ids', 'roi_ids')
    node_indices = []

    # allocate space for output
    padded_dsets = []
    hstack_indices = []
    first_node_index = 0
    for i, (dset, pad_to) in enumerate(zip(dsets, pad_to_feature_index)):
        # get node indices in this dataset
        node_index = _find_node_indices(dset, node_indices_labels)
        if node_index is None:
            node_index = np.arange(dset.nfeatures)
        max_node_index = np.max(node_index)

        # make a stripped version - without node index labels
        stripped_dset = dset.copy()
        for label in node_indices_labels:
            if label in stripped_dset.fa:
                stripped_dset.fa.pop(label)

        # see if padding is needed
        if pad_to is None or pad_to == max_node_index + 1:
            if not np.array_equal(np.arange(max_node_index + 1), np.sort(node_index)):
                raise ValueError("Sparse input %d: need pad_to input" % (i + 1))
            padded_dset = stripped_dset
            other_index = np.arange(0)
        else:
            # have to use empty values
            nfeatures_empty = pad_to - dset.nfeatures
            if nfeatures_empty < 0:
                raise ValueError("Dataset has %d features, cannot pad "
                                 "to %d" % (dset.nfeatures, pad_to))

            # make empty array
            empty_arr = np.zeros((dset.nsamples, nfeatures_empty),
                                 dtype=dset.samples.dtype) + set_empty_value
            empty_dset = Dataset(empty_arr, sa=stripped_dset.sa.copy(deep=True))

            # combine current dset and empty array
            padded_dset = dataset.hstack((stripped_dset, empty_dset), hstack_method)

            # set the proper node indices
            other_index = np.setdiff1d(np.arange(pad_to), node_index)

        # sanity check to make sure that indices are ok
        # XXX could be more informative
        if len(np.setdiff1d(node_index, np.arange(pad_to or max_node_index + 1))):
            raise ValueError("Illegal indices")

        hstack_index = node_index + first_node_index
        hstack_other_index = other_index + first_node_index
        first_node_index += pad_to or (max_node_index + 1)  # prepare for next iteration

        padded_dsets.append(padded_dset)
        hstack_indices.append(hstack_index)
        if len(other_index):
            hstack_indices.append(hstack_other_index)

    hstack_dset = dataset.hstack(padded_dsets, hstack_method)
    hstack_indices = np.hstack(hstack_indices)

    hstack_dset.fa[node_indices_labels[0]] = hstack_indices

    return hstack_dset
Exemplo n.º 17
0
 def _get_connectomes(self, datasets):
     params = self.params
     # If no precomputed connectomes are supplied, compute them.
     if params.connectomes is not None and os.path.exists(
             params.connectomes):
         _chpaldebug("Loading pre-computed connectomes from ",
                     params.connectomes)
         connectomes = h5load(params.connectomes)
         return connectomes
     connectivity_mapper = FxyMapper(params.conn_metric)
     # Initializing datasets with original anatomically aligned datasets
     mfm = MeanFeatureMeasure()
     # TODO Handle seed_radius if seed queryengines are not provided
     seed_radius = params.seed_radius
     _chpaldebug(
         "Performing surface connectivity hyperalignment with seeds")
     _chpaldebug("Computing connectomes.")
     ndatasets = len(datasets)
     if params.seed_queryengines is None:
         raise NotImplementedError("For now, we need seed queryengines.")
     qe_all = super(ConnectivityHyperalignment,
                    self)._get_trained_queryengines(
                        datasets, params.seed_queryengines, seed_radius,
                        params.ref_ds)
     # If seed_indices are not supplied, use all as centers
     if not params.seed_indices:
         roi_ids = super(ConnectivityHyperalignment,
                         self)._get_verified_ids(qe_all)
     else:
         roi_ids = params.seed_indices
     if len(qe_all) == 1:
         qe_all *= ndatasets
     # Computing Seed means to be used for aligning seed features
     seed_means = [
         self._get_seed_means(MeanFeatureMeasure(), qe, ds,
                              params.seed_indices)
         for qe, ds in zip(qe_all, datasets)
     ]
     if params.npcs is None:
         conn_targets = []
         for seed_mean in seed_means:
             zscore(seed_mean, chunks_attr=None)
             conn_targets.append(seed_mean)
     else:
         # compute all PC-seed connectivity in each subject
         # 1. make common model SVs in each seed SL based on connectivity to seed_means
         # 2. Use these SVs for computing connectomes
         _chpaldebug("Aligning SVs in each searchlight across subjects")
         # Looping over all seeds in which SVD is done
         pc_data = [[] for isub in range(ndatasets)]
         sl_common_models = dict()
         if params.common_model is not None and os.path.exists(
                 params.common_model):
             _chpaldebug("Loading common model from %s" %
                         params.common_model)
             common_model = h5load(params.common_model)
             sl_common_models = common_model['local_models']
         for inode in roi_ids:
             # For each SL, computing connectivity of features to seed means
             # This line below doesn't need common model
             sl_connectomes = self._get_sl_connectomes(
                 seed_means, qe_all, datasets, inode, connectivity_mapper)
             # Hyperalign connectomes in SL
             # XXX TODO Common model input to below function should be updated.
             local_common_model = sl_common_models[inode][:, :params.npcs] \
                                     if params.common_model else None
             sl_hmappers, svm, sl_common_model = self._get_hypesvs(
                 sl_connectomes, local_common_model=local_common_model)
             if sl_common_model is not None:
                 sl_common_models[inode] = sl_common_model
             # make common model SV timeseries data in each subject
             for sd, slhm, qe, pcd in zip(datasets, sl_hmappers, qe_all,
                                          pc_data):
                 sd_svs = slhm.forward(sd[:, qe[inode]])
                 zscore(sd_svs, chunks_attr=None)
                 if svm is not None:
                     sd_svs = svm.forward(sd_svs)
                     sd_svs = sd_svs[:, :params.npcs]
                     zscore(sd_svs, chunks_attr=None)
                 pcd.append(sd_svs)
         if params.save_model is not None:
             # TODO: should use debug
             print('Saving local models to %s' % params.save_model)
             h5save(params.save_model, sl_common_models)
         pc_data = [hstack(pcd) for pcd in pc_data]
         conn_targets = pc_data
         #print pc_data[-1]
     # compute connectomes using connectivity targets (PCs or seed means)
     connectomes = []
     if params.common_model is not None and os.path.exists(
             params.common_model):
         # TODO: should use debug
         print('Loading from saved common model: %s' % params.common_model)
         connectome_model = common_model['connectome_model']
         connectomes.append(connectome_model)
     for t_, ds in zip(conn_targets, datasets):
         connectivity_mapper.train(t_)
         connectome = connectivity_mapper.forward(ds)
         t_ = None
         connectome.fa = ds.fa
         if connectome.samples.dtype == 'float64':
             connectome.samples = connectome.samples.astype('float32')
         zscore(connectome, chunks_attr=None)
         connectomes.append(connectome)
     if params.connectomes is not None and not os.path.exists(
             params.connectomes):
         _chpaldebug("Saving connectomes to ", params.connectomes)
         h5save(params.connectomes, connectomes)
     return connectomes