示例#1
0
def test_uniquemerge2literal():
    assert_equal(u2l(range(3)), ['0+1+2'])
    assert_equal(u2l(np.arange(6).reshape(2, 3)), ['[0 1 2]+[3 4 5]'])
    assert_array_equal(u2l([[2, 3, 4]]), [[2, 3, 4]])
    assert_array_equal(u2l([[2, 3, 4], [2, 3, 4]]), [[2, 3, 4]])
    assert_equal(u2l([2, 2, 2]), [2])
    assert_array_equal(u2l(['L1', 'L1']), ['L1'])
    # we should not loose our precious "tuples"
    assert_equal(u2l(asobjarray([('1', '0'), ('1', '0')])),
                 asobjarray([('1', '0')]))
示例#2
0
def test_uniquemerge2literal():
    assert_equal(u2l(range(3)), ['0+1+2'])
    assert_equal(u2l(
        np.arange(6).reshape(2, 3)), ['[0 1 2]+[3 4 5]'])
    assert_array_equal(u2l([[2, 3, 4]]), [[2, 3, 4]])
    assert_array_equal(u2l([[2, 3, 4], [2, 3, 4]]), [[2, 3, 4]])
    assert_equal(u2l([2, 2, 2]), [2])
    assert_array_equal(u2l(['L1', 'L1']), ['L1'])
    # we should not loose our precious "tuples"
    assert_equal(u2l(asobjarray([('1', '0'), ('1', '0')])), asobjarray([('1', '0')]))
示例#3
0
def _hdf_list_to_objarray(hdf, memo):
    if not ('shape' in hdf.attrs):
        if __debug__:
            debug('HDF5', "Enountered objarray stored without shape (due to a bug "
                "in post 2.1 release).  Some nested structures etc might not be "
                "loaded incorrectly")
        # yoh: we have possibly a problematic case due to my fix earlier
        # resolve to old logic:  nested referencing might not work :-/
        obj = _hdf_list_to_obj(hdf, memo)
        # need to handle special case of arrays of objects
        if np.isscalar(obj):
            obj = np.array(obj, dtype=np.object)
        else:
            obj = asobjarray(obj)
    else:
        shape = tuple(hdf.attrs['shape'])
        # reserve space first
        if len(shape):
            obj = np.empty(np.prod(shape), dtype=object)
        else:
            # scalar
            obj = np.array(None, dtype=object)
        # now load the items from the list, noting existence of this
        # container
        obj_items = _hdf_list_to_obj(hdf, memo, target_container=obj)
        # assign to the object array
        for i, v in enumerate(obj_items):
            obj[i] = v
        if len(shape) and shape != obj.shape:
            obj = obj.reshape(shape)
    return obj
示例#4
0
 def test_asobjarray(self):
     for i in ([1, 2, 3], ['a', 2, '3'],
               ('asd')):
         i_con = asobjarray(i)
         self.assertTrue(i_con.dtype is np.dtype('object'))
         self.assertEqual(len(i), len(i_con))
         self.assertTrue(np.all(i == i_con))
示例#5
0
文件: plr.py 项目: Python3pkg/PyMVPA
    def _call(self, dataset=None):
        """Extract weights from PLR classifier.

        PLR always has weights available, so nothing has to be computed here.
        """
        clf = self.clf
        attrmap = clf._attrmap

        if attrmap:
            # labels (values of the corresponding space) which were used
            # for mapping Here we rely on the fact that they are sorted
            # originally (just an arange())
            labels_num = list(attrmap.values())
            labels = attrmap.to_literal(asobjarray([tuple(sorted(labels_num))
                                                    ]),
                                        recurse=True)
        else:
            labels = [(0, 1)]  # we just had our good old numeric ones

        ds = Dataset(clf.w.T,
                     sa={
                         clf.get_space(): labels,
                         'biases': [clf.bias]
                     })
        return ds
示例#6
0
 def test_asobjarray(self):
     for i in ([1, 2, 3], ['a', 2, '3'],
               ('asd')):
         i_con = asobjarray(i)
         self.assertTrue(i_con.dtype is np.dtype('object'))
         self.assertEqual(len(i), len(i_con))
         self.assertTrue(np.all(i == i_con))
示例#7
0
    def _call(self, dataset):
        # for a binary decision between two labels, for all pairwise combinations of labels in
        # the dataset, compute weights per feature as the difference between means given label
        # divided by the variance.
        clf = self.clf
        # get means of all attributes given class label
        means = clf.means
        # number of features
        nfeat = clf.means.shape[1]

        # all pairwise combinations of labels
        pairs = list(itertools.combinations(range(len(clf.ulabels)), 2))

        weights = np.zeros([len(pairs), nfeat])
        # do not compute sensitivity for features with variance 0 as this would
        # implicate a division by zero
        nonzero_vars = clf.variances != 0
        assert clf.params.common_variance
        nonzero_vars0 = nonzero_vars[0, :]
        for idx, pair in enumerate(pairs):
            # two-class sensitivity for (L0, L1) assumes that L1 is the
            # "positive one"
            weights[idx, nonzero_vars0] = (means[pair[1], nonzero_vars0] -
                                           means[pair[0], nonzero_vars0]) / \
                                          clf.variances[pair[0], nonzero_vars0]

        # put everything into a Dataset
        ds = Dataset(weights,
                     sa={
                         clf.get_space():
                         asobjarray([(clf.ulabels[p1], clf.ulabels[p2])
                                     for p1, p2 in pairs])
                     })
        return ds
示例#8
0
 def test_asobjarray(self):
     for i in ([1, 2, 3], ['a', 2, '3'],
               ('asd')):
         i_con = asobjarray(i)
         self.failUnless(i_con.dtype is np.dtype('object'))
         self.failUnlessEqual(len(i), len(i_con))
         self.failUnless(np.all(i == i_con))
示例#9
0
def _hdf_list_to_objarray(hdf, memo):
    if not ('shape' in hdf.attrs):
        if __debug__:
            debug(
                'HDF5',
                "Enountered objarray stored without shape (due to a bug "
                "in post 2.1 release).  Some nested structures etc might not be "
                "loaded incorrectly")
        # yoh: we have possibly a problematic case due to my fix earlier
        # resolve to old logic:  nested referencing might not work :-/
        obj = _hdf_list_to_obj(hdf, memo)
        # need to handle special case of arrays of objects
        if np.isscalar(obj):
            obj = np.array(obj, dtype=np.object)
        else:
            obj = asobjarray(obj)
    else:
        shape = tuple(hdf.attrs['shape'])
        # reserve space first
        if len(shape):
            obj = np.empty(np.prod(shape), dtype=object)
        else:
            # scalar
            obj = np.array(None, dtype=object)
        # now load the items from the list, noting existence of this
        # container
        obj_items = _hdf_list_to_obj(hdf, memo, target_container=obj)
        # assign to the object array
        for i, v in enumerate(obj_items):
            obj[i] = v
        if len(shape) and shape != obj.shape:
            obj = obj.reshape(shape)
    return obj
示例#10
0
    def _call(self, dataset):
        # XXX Hm... it might make sense to unify access functions
        # naming across our swig libsvm wrapper and sg access
        # functions for svm
        clf = self.clf
        sgsvm = clf.svm
        sens_labels = None
        if isinstance(sgsvm, shogun.Classifier.MultiClassSVM):
            sens, biases = [], []
            nsvms = sgsvm.get_num_svms()
            clabels = sorted(clf._attrmap.values())
            nclabels = len(clabels)
            sens_labels = []
            isvm = 0  # index for svm among known

            for i in xrange(nclabels):
                for j in xrange(i + 1, nclabels):
                    sgsvmi = sgsvm.get_svm(isvm)
                    labels_tuple = (clabels[i], clabels[j])
                    # Since we gave the labels in incremental order,
                    # we always should be right - but it does not
                    # hurt to check if set of labels is the same
                    if __debug__ and _shogun_exposes_slavesvm_labels:
                        if not sgsvmi.get_labels():
                            # We need to call classify() so labels get assigned
                            # to the multiclass SVM
                            sgsvm.classify()
                        assert (set([
                            sgsvmi.get_label(int(x))
                            for x in sgsvmi.get_support_vectors()
                        ]) == set(labels_tuple))
                    sens1, bias = self.__sg_helper(sgsvmi)
                    sens.append(sens1)
                    biases.append(bias)
                    sens_labels += [labels_tuple[::-1]]  # ??? positive first
                    isvm += 1
            assert (len(sens) == nsvms)  # we should have  covered all
        else:
            sens1, bias = self.__sg_helper(sgsvm)
            biases = np.atleast_1d(bias)
            sens = np.atleast_2d(sens1)
            if not clf.__is_regression__:
                assert (set(clf._attrmap.values()) == set([-1.0, 1.0]))
                assert (sens.shape[0] == 1)
                sens_labels = [(-1.0, 1.0)]

        ds = Dataset(np.atleast_2d(sens))
        if sens_labels is not None:
            if isinstance(sens_labels[0], tuple):
                # Need to have them in array of dtype object
                sens_labels = asobjarray(sens_labels)

            if len(clf._attrmap):
                sens_labels = clf._attrmap.to_literal(sens_labels,
                                                      recurse=True)
            ds.sa[clf.get_space()] = sens_labels
        ds.sa['biases'] = biases

        return ds
示例#11
0
文件: sens.py 项目: Anhmike/PyMVPA
    def _call(self, dataset):
        # XXX Hm... it might make sense to unify access functions
        # naming across our swig libsvm wrapper and sg access
        # functions for svm
        clf = self.clf
        sgsvm = clf.svm
        sens_labels = None
        if isinstance(sgsvm, shogun.Classifier.MultiClassSVM):
            sens, biases = [], []
            nsvms = sgsvm.get_num_svms()
            clabels = sorted(clf._attrmap.values())
            nclabels = len(clabels)
            sens_labels = []
            isvm = 0                    # index for svm among known

            for i in xrange(nclabels):
                for j in xrange(i+1, nclabels):
                    sgsvmi = sgsvm.get_svm(isvm)
                    labels_tuple = (clabels[i], clabels[j])
                    # Since we gave the labels in incremental order,
                    # we always should be right - but it does not
                    # hurt to check if set of labels is the same
                    if __debug__ and _shogun_exposes_slavesvm_labels:
                        if not sgsvmi.get_labels():
                            # We need to call classify() so labels get assigned
                            # to the multiclass SVM
                            sgsvm.classify()
                        assert(set([sgsvmi.get_label(int(x))
                                    for x in sgsvmi.get_support_vectors()])
                               == set(labels_tuple))
                    sens1, bias = self.__sg_helper(sgsvmi)
                    sens.append(sens1)
                    biases.append(bias)
                    sens_labels += [labels_tuple[::-1]] # ??? positive first
                    isvm += 1
            assert(len(sens) == nsvms)  # we should have  covered all
        else:
            sens1, bias = self.__sg_helper(sgsvm)
            biases = np.atleast_1d(bias)
            sens = np.atleast_2d(sens1)
            if not clf.__is_regression__:
                assert(set(clf._attrmap.values()) == set([-1.0, 1.0]))
                assert(sens.shape[0] == 1)
                sens_labels = [(-1.0, 1.0)]

        ds = Dataset(np.atleast_2d(sens))
        if sens_labels is not None:
            if isinstance(sens_labels[0], tuple):
                # Need to have them in array of dtype object
                sens_labels = asobjarray(sens_labels)

            if len(clf._attrmap):
                sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True)
            ds.sa[clf.get_space()] = sens_labels
        ds.sa['biases'] = biases

        return ds
示例#12
0
    def test_asobjarray(self):
        for i in ([1, 2, 3], ['a', 2, '3'], ('asd')):
            i_con = asobjarray(i)
            self.assertTrue(i_con.dtype is np.dtype('object'))
            self.assertEqual(len(i), len(i_con))

            # Note: in Python3 the ['a' , 2, '3'] list is converted to
            # an array with elements 'a', '2',' and '3' (i.e. string representation
            # for the second element), and thus np.all(i==i_con) fails.
            # Instead here each element is tested for equality seperately
            # XXX is this an issue?
            self.assertTrue(np.all((i[j] == i_con[j]) for j in xrange(len(i))))
示例#13
0
文件: base.py 项目: psederberg/PyMVPA
 def _call(self, dataset):
     sens = super(RegressionAsClassifierSensitivityAnalyzer, self)._call(dataset)
     # We can have only a single sensitivity out of regression
     assert sens.shape[0] == 1
     clf = self.clf
     targets_attr = clf.get_space()
     if targets_attr not in sens.sa:
         # We just assign a tuple of all labels sorted
         labels = tuple(sorted(clf._trained_attrmap.values()))
         if len(clf._trained_attrmap):
             labels = clf._trained_attrmap.to_literal(labels, recurse=True)
         sens.sa[targets_attr] = asobjarray([labels])
     return sens
示例#14
0
    def test_asobjarray(self):
        for i in ([1, 2, 3], ['a', 2, '3'],
                  ('asd')):
            i_con = asobjarray(i)
            self.assertTrue(i_con.dtype is np.dtype('object'))
            self.assertEqual(len(i), len(i_con))

            # Note: in Python3 the ['a' , 2, '3'] list is converted to
            # an array with elements 'a', '2',' and '3' (i.e. string representation
            # for the second element), and thus np.all(i==i_con) fails.
            # Instead here each element is tested for equality seperately
            # XXX is this an issue?
            self.assertTrue(np.all((i[j] == i_con[j]) for j in xrange(len(i))))
示例#15
0
文件: base.py 项目: Python3pkg/PyMVPA
 def _call(self, dataset):
     sens = super(RegressionAsClassifierSensitivityAnalyzer,
                  self)._call(dataset)
     # We can have only a single sensitivity out of regression
     assert (sens.shape[0] == 1)
     clf = self.clf
     targets_attr = clf.get_space()
     if targets_attr not in sens.sa:
         # We just assign a tuple of all labels sorted
         labels = tuple(sorted(clf._trained_attrmap.values()))
         if len(clf._trained_attrmap):
             labels = clf._trained_attrmap.to_literal(labels, recurse=True)
         sens.sa[targets_attr] = asobjarray([labels])
     return sens
示例#16
0
def _uniquemerge2literal(attrs):
    """Compress a sequence into its unique elements (with string merge).

    Whenever there is more then one unique element in `attrs`, these
    are converted to a string and join with a '+' character inbetween.

    Parameters
    ----------
    attrs : sequence, arbitrary

    Returns
    -------
    Non-sequence arguments are passed as is, otherwise a sequences of unique
    items is. None is returned in case of an empty sequence.
    """
    try:
        if isinstance(attrs[0], basestring):
            # do not try to disassemble sequences of strings
            raise TypeError
        uvalues = set(map(tuple, attrs))
        # if we were provided array of object type, most likely because
        # we had tuples or other objects, we must produce also object array
        if isinstance(attrs, np.ndarray) and attrs.dtype == 'O':
            unq = asobjarray(list(uvalues))
        else:
            unq = list(map(np.array, uvalues))
    except TypeError:
        # either no 2d-iterable...
        try:
            unq = np.unique(attrs)
        except TypeError:
            # or no iterable at all -- return the original
            return attrs

    lunq = len(unq)
    if lunq > 1:
        return ['+'.join([str(l) for l in unq])]
    elif lunq:
        return unq
    else:
        return None
示例#17
0
文件: plr.py 项目: schoeke/PyMVPA
    def _call(self, dataset=None):
        """Extract weights from PLR classifier.

        PLR always has weights available, so nothing has to be computed here.
        """
        clf = self.clf
        attrmap = clf._attrmap

        if attrmap:
            # labels (values of the corresponding space) which were used
            # for mapping Here we rely on the fact that they are sorted
            # originally (just an arange())
            labels_num = attrmap.values()
            labels = attrmap.to_literal(asobjarray([tuple(sorted(labels_num))]),
                                        recurse=True)
        else:
            labels = [(0, 1)]           # we just had our good old numeric ones

        ds = Dataset(clf.w.T, sa={clf.get_space(): labels,
                                  'biases' : [clf.bias]})
        return ds
示例#18
0
文件: gnb.py 项目: PyMVPA/PyMVPA
    def _call(self, dataset):
        # for a binary decision between two labels, for all pairwise combinations of labels in
        # the dataset, compute weights per feature as the difference between means given label
        # divided by the variance.
        clf = self.clf
        # get means of all attributes given class label
        means = clf.means
        # number of features
        nfeat = clf.means.shape[1]

        # all pairwise combinations of labels
        pairs = list(itertools.combinations(range(len(clf.ulabels)), 2))

        weights = np.zeros([len(pairs), nfeat])
        # do not compute sensitivity for features with variance 0 as this would
        # implicate a division by zero
        nonzero_vars = clf.variances != 0
        assert clf.params.common_variance
        nonzero_vars0 = nonzero_vars[0, :]
        for idx, pair in enumerate(pairs):
            # two-class sensitivity for (L0, L1) assumes that L1 is the
            # "positive one"
            weights[idx, nonzero_vars0] = (means[pair[1], nonzero_vars0] -
                                           means[pair[0], nonzero_vars0]) / \
                                          clf.variances[pair[0], nonzero_vars0]

        # put everything into a Dataset
        ds = Dataset(
            weights,
            sa={
                clf.get_space(): asobjarray([
                    (clf.ulabels[p1], clf.ulabels[p2]) for p1, p2 in pairs]
                )
            }
        )
        return ds
示例#19
0
 def test_asobjarray(self):
     for i in ([1, 2, 3], ["a", 2, "3"], ("asd")):
         i_con = asobjarray(i)
         self.assertTrue(i_con.dtype is np.dtype("object"))
         self.assertEqual(len(i), len(i_con))
         self.assertTrue(np.all(i == i_con))
示例#20
0
    def _call(self, dataset, callables=[]):
        # local bindings
        clf = self.clf
        model = clf.model

        # Labels for sensitivities to be returned
        sens_labels = None

        if clf.__is_regression__:
            nr_class = None
            svm_labels = None  # shouldn't bother to provide "targets" for regressions
        else:
            nr_class = model.nr_class
            svm_labels = model.labels

        # No need to warn since now we by default we do not do
        # anything evil and provide labels -- so it is up for a user
        # to decide either he wants to do something silly
        #if nr_class != 2:
        #    warning("You are estimating sensitivity for SVM %s trained on %d" %
        #            (str(clf), nr_class) +
        #            " classes. Make sure that it is what you intended to do" )

        svcoef = np.matrix(model.get_sv_coef())
        svs = np.matrix(model.get_sv())
        rhos = np.asarray(model.get_rho())

        if self.params.split_weights:
            if nr_class != 2:
                raise NotImplementedError, \
                      "Cannot compute per-class weights for" \
                      " non-binary classification task"
            # libsvm might have different idea on the ordering
            # of labels, so we would need to map them back explicitely
            ds_labels = list(
                dataset.sa[clf.get_space()].unique)  # labels in the dataset
            senses = [None for i in ds_labels]
            # first label is given positive value
            for i, (c, l) in enumerate([(svcoef > 0, lambda x: x),
                                        (svcoef < 0, lambda x: x * -1)]):
                # convert to array, and just take the meaningful dimension
                c_ = c.A[0]
                # NOTE svm_labels are numerical; ds_labels are literal
                senses[ds_labels.index(
                            clf._attrmap.to_literal(svm_labels[i]))] = \
                                (l(svcoef[:, c_] * svs[c_, :])).A[0]
            weights = np.array(senses)
            sens_labels = svm_labels
        else:
            # XXX yoh: .mean() is effectively
            # averages across "sensitivities" of all paired classifiers (I
            # think). See more info on this topic in svm.py on how sv_coefs
            # are stored
            #
            # First multiply SV coefficients with the actual SVs to get
            # weighted impact of SVs on decision, then for each feature
            # take mean across SVs to get a single weight value
            # per feature
            if nr_class is None or nr_class <= 2:
                # as simple as this
                weights = (svcoef * svs).A
                # and only in case of classification
                if nr_class:
                    # ??? First label seems corresponds to positive
                    sens_labels = [tuple(svm_labels[::-1])]
            else:
                # we need to compose correctly per each pair of classifiers.
                # See docstring for get_sv_coef for more details on internal
                # structure of bloody storage

                # total # of pairs
                npairs = nr_class * (nr_class - 1) / 2
                # # of SVs in each class
                NSVs_perclass = model.get_n_sv()
                # indices where each class starts in each row of SVs
                # name is after similar variable in libsvm internals
                nz_start = np.cumsum([0] + NSVs_perclass[:-1])
                nz_end = nz_start + NSVs_perclass
                # reserve storage
                weights = np.zeros((npairs, svs.shape[1]))
                ipair = 0  # index of the pair
                """
                // classifier (i,j): coefficients with
				// i are in sv_coef[j-1][nz_start[i]...],
				// j are in sv_coef[i][nz_start[j]...]
                """
                sens_labels = []
                for i in xrange(nr_class):
                    for j in xrange(i + 1, nr_class):
                        weights[ipair, :] = np.asarray(
                            svcoef[j - 1, nz_start[i]:nz_end[i]] *
                            svs[nz_start[i]:nz_end[i]] +
                            svcoef[i, nz_start[j]:nz_end[j]] *
                            svs[nz_start[j]:nz_end[j]])
                        # ??? First label corresponds to positive
                        # that is why [j], [i]
                        sens_labels += [(svm_labels[j], svm_labels[i])]
                        ipair += 1  # go to the next pair
                assert (ipair == npairs)

        if __debug__ and 'SVM' in debug.active:
            if nr_class:
                nsvs = model.get_n_sv()
            else:
                nsvs = model.get_total_n_sv()
            if clf.__is_regression__:
                svm_type = clf._svm_impl  # type of regression
            else:
                svm_type = '%d-class SVM(%s)' % (nr_class, clf._svm_impl)
            debug('SVM',
                  "Extracting weights for %s: #SVs=%s, " % \
                  (svm_type, nsvs) + \
                  " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \
                  (svcoef.shape, svs.shape, rhos) + \
                  " Result: min=%f max=%f" % (np.min(weights), np.max(weights)))

        ds_kwargs = {}
        if nr_class:  # for classification only
            # and we should have prepared the labels
            assert (sens_labels is not None)

            if len(clf._attrmap):
                if isinstance(sens_labels[0], tuple):
                    sens_labels = asobjarray(sens_labels)
                sens_labels = clf._attrmap.to_literal(sens_labels,
                                                      recurse=True)

            # NOTE: `weights` is already and always 2D
            ds_kwargs = dict(sa={clf.get_space(): sens_labels})

        weights_ds = Dataset(weights, **ds_kwargs)
        weights_ds.sa['biases'] = rhos
        return weights_ds
示例#21
0
文件: sens.py 项目: Arthurkorn/PyMVPA
    def _call(self, dataset, callables=[]):
        # local bindings
        clf = self.clf
        model = clf.model

        # Labels for sensitivities to be returned
        sens_labels = None

        if clf.__is_regression__:
            nr_class = None
            svm_labels = None           # shouldn't bother to provide "targets" for regressions
        else:
            nr_class = model.nr_class
            svm_labels = model.labels

        # No need to warn since now we by default we do not do
        # anything evil and provide labels -- so it is up for a user
        # to decide either he wants to do something silly
        #if nr_class != 2:
        #    warning("You are estimating sensitivity for SVM %s trained on %d" %
        #            (str(clf), nr_class) +
        #            " classes. Make sure that it is what you intended to do" )

        svcoef = np.matrix(model.get_sv_coef())
        svs = np.matrix(model.get_sv())
        rhos = np.asarray(model.get_rho())

        if self.params.split_weights:
            if nr_class != 2:
                raise NotImplementedError, \
                      "Cannot compute per-class weights for" \
                      " non-binary classification task"
            # libsvm might have different idea on the ordering
            # of labels, so we would need to map them back explicitely
            ds_labels = list(dataset.sa[clf.get_space()].unique) # labels in the dataset
            senses = [None for i in ds_labels]
            # first label is given positive value
            for i, (c, l) in enumerate( [(svcoef > 0, lambda x: x),
                                         (svcoef < 0, lambda x: x*-1)] ):
                # convert to array, and just take the meaningful dimension
                c_ = c.A[0]
                # NOTE svm_labels are numerical; ds_labels are literal
                senses[ds_labels.index(
                            clf._attrmap.to_literal(svm_labels[i]))] = \
                                (l(svcoef[:, c_] * svs[c_, :])).A[0]
            weights = np.array(senses)
            sens_labels = svm_labels
        else:
            # XXX yoh: .mean() is effectively
            # averages across "sensitivities" of all paired classifiers (I
            # think). See more info on this topic in svm.py on how sv_coefs
            # are stored
            #
            # First multiply SV coefficients with the actual SVs to get
            # weighted impact of SVs on decision, then for each feature
            # take mean across SVs to get a single weight value
            # per feature
            if nr_class is None or nr_class <= 2:
                # as simple as this
                weights = (svcoef * svs).A
                # and only in case of classification
                if nr_class:
                    # ??? First label seems corresponds to positive
                    sens_labels = [tuple(svm_labels[::-1])]
            else:
                # we need to compose correctly per each pair of classifiers.
                # See docstring for get_sv_coef for more details on internal
                # structure of bloody storage

                # total # of pairs
                npairs = nr_class * (nr_class-1)/2
                # # of SVs in each class
                NSVs_perclass = model.get_n_sv()
                # indices where each class starts in each row of SVs
                # name is after similar variable in libsvm internals
                nz_start = np.cumsum([0] + NSVs_perclass[:-1])
                nz_end = nz_start + NSVs_perclass
                # reserve storage
                weights = np.zeros((npairs, svs.shape[1]))
                ipair = 0               # index of the pair
                """
                // classifier (i,j): coefficients with
				// i are in sv_coef[j-1][nz_start[i]...],
				// j are in sv_coef[i][nz_start[j]...]
                """
                sens_labels = []
                for i in xrange(nr_class):
                    for j in xrange(i+1, nr_class):
                        weights[ipair, :] = np.asarray(
                            svcoef[j-1, nz_start[i]:nz_end[i]]
                            * svs[nz_start[i]:nz_end[i]]
                            +
                            svcoef[i, nz_start[j]:nz_end[j]]
                            * svs[nz_start[j]:nz_end[j]]
                            )
                        # ??? First label corresponds to positive
                        # that is why [j], [i]
                        sens_labels += [(svm_labels[j], svm_labels[i])]
                        ipair += 1      # go to the next pair
                assert(ipair == npairs)

        if __debug__ and 'SVM' in debug.active:
            if nr_class:
                nsvs = model.get_n_sv()
            else:
                nsvs = model.get_total_n_sv()
            if clf.__is_regression__:
                svm_type = clf._svm_impl # type of regression
            else:
                svm_type = '%d-class SVM(%s)' % (nr_class, clf._svm_impl)
            debug('SVM',
                  "Extracting weights for %s: #SVs=%s, " % \
                  (svm_type, nsvs) + \
                  " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \
                  (svcoef.shape, svs.shape, rhos) + \
                  " Result: min=%f max=%f" % (np.min(weights), np.max(weights)))

        ds_kwargs = {}
        if nr_class:          # for classification only
            # and we should have prepared the labels
            assert(sens_labels is not None)

            if len(clf._attrmap):
                if isinstance(sens_labels[0], tuple):
                    sens_labels = asobjarray(sens_labels)
                sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True)

            # NOTE: `weights` is already and always 2D
            ds_kwargs = dict(sa={clf.get_space(): sens_labels})

        weights_ds = Dataset(weights, **ds_kwargs)
        weights_ds.sa['biases'] = rhos
        return weights_ds
示例#22
0
文件: hdf5.py 项目: andreirusu/PyMVPA
def hdf2obj(hdf, memo=None):
    """Convert an HDF5 group definition into an object instance.

    Obviously, this function assumes the conventions implemented in the
    `obj2hdf()` function. Those conventions will eventually be documented in
    the module docstring, whenever they are sufficiently stable.

    Parameters
    ----------
    hdf : HDF5 group instance
      HDF5 group instance. this could also be an HDF5 file instance.
    memo : dict
      Dictionary tracking reconstructed objects to prevent recursions (analog to
      deepcopy).

    Notes
    -----
    Although, this function uses a way to reconstruct object instances that is
    similar to unpickling, it should be *relatively* safe to open HDF files
    from untrusted sources. Only basic datatypes are stored in HDF files, and
    there is no foreign code that is executed during reconstructing. For that
    reason, any type that shall be reconstructed needs to be importable
    (importing is done be fully-qualified module names).

    Returns
    -------
    object instance
    """
    if memo is None:
        # init object tracker
        memo = {}
    # note, older file formats did not store objrefs
    if 'objref' in hdf.attrs:
        objref = hdf.attrs['objref']
    else:
        objref = None

    # if this HDF group has an objref that points to an already recontructed
    # object, simple return this object again
    if not objref is None and objref in memo:
        obj = memo[objref]
        if __debug__:
            debug('HDF5', "Use tracked object %s (%i)" % (type(obj), objref))
        return obj

    #
    # Actual data
    #
    if isinstance(hdf, h5py.Dataset):
        if __debug__:
            debug('HDF5', "Load from HDF5 dataset [%s]" % hdf.name)
        if 'is_scalar' in hdf.attrs:
            # extract the scalar from the 0D array
            obj = hdf[()]
            # and coerce it back into the native Python type if necessary
            if issubclass(type(obj), np.generic):
                obj = np.asscalar(obj)
        elif 'is_numpy_scalar' in hdf.attrs:
            # extract the scalar from the 0D array as is
            obj = hdf[()]
        else:
            # read array-dataset into an array
            obj = np.empty(hdf.shape, hdf.dtype)
            if obj.size:
                hdf.read_direct(obj)
    else:
        # check if we have a class instance definition here
        if not ('class' in hdf.attrs or 'recon' in hdf.attrs):
            raise LookupError(
                "Found hdf group without class instance "
                "information (group: %s). Cannot convert it into an "
                "object (content: '%s', attributes: '%s')." %
                (hdf.name, hdf.keys(), hdf.attrs.keys()))

        mod_name = hdf.attrs['module']

        if 'recon' in hdf.attrs:
            # Custom objects custom reconstructor
            obj = _recon_customobj_customrecon(hdf, memo)
        elif mod_name != '__builtin__':
            # Custom objects default reconstructor
            cls_name = hdf.attrs['class']
            if cls_name in ('function', 'type', 'builtin_function_or_method'):
                # Functions and types
                obj = _recon_functype(hdf)
            else:
                # Other custom objects
                obj = _recon_customobj_defaultrecon(hdf, memo)
        else:
            # Built-in objects
            cls_name = hdf.attrs['class']
            if __debug__:
                debug('HDF5',
                      "Reconstructing built-in object '%s'." % cls_name)
            # built in type (there should be only 'list', 'dict' and 'None'
            # that would not be in a Dataset
            if cls_name == 'NoneType':
                obj = None
            elif cls_name == 'tuple':
                obj = _hdf_tupleitems_to_obj(hdf, memo)
            elif cls_name == 'list':
                obj = _hdf_list_to_obj(hdf, memo)
            elif cls_name == 'dict':
                obj = _hdf_dict_to_obj(hdf, memo)
            elif cls_name == 'type':
                obj = eval(hdf.attrs['name'])
            elif cls_name == 'function':
                raise RuntimeError("Unhandled reconstruction of built-in "
                                   "function (at '%s')." % hdf.name)
            else:
                raise RuntimeError(
                    "Found hdf group with a builtin type "
                    "that is not handled by the parser (group: %s). This "
                    "is a conceptual bug in the parser. Please report." %
                    hdf.name)
    #
    # Final post-processing
    #
    if 'is_objarray' in hdf.attrs:
        # need to handle special case of arrays of objects
        if np.isscalar(obj):
            obj = np.array(obj, dtype=np.object)
        else:
            obj = asobjarray(obj)
        if 'shape' in hdf.attrs:
            shape = tuple(hdf.attrs['shape'])
            if shape != obj.shape:
                obj = obj.reshape(shape)

    # track if desired
    if objref:
        memo[objref] = obj
    if __debug__:
        debug('HDF5', "Done loading %s [%s]" % (type(obj), hdf.name))
    return obj
示例#23
0
def hdf2obj(hdf, memo=None):
    """Convert an HDF5 group definition into an object instance.

    Obviously, this function assumes the conventions implemented in the
    `obj2hdf()` function. Those conventions will eventually be documented in
    the module docstring, whenever they are sufficiently stable.

    Parameters
    ----------
    hdf : HDF5 group instance
      HDF5 group instance. this could also be an HDF5 file instance.
    memo : dict
      Dictionary tracking reconstructed objects to prevent recursions (analog to
      deepcopy).

    Notes
    -----
    Although, this function uses a way to reconstruct object instances that is
    similar to unpickling, it should be *relatively* safe to open HDF files
    from untrusted sources. Only basic datatypes are stored in HDF files, and
    there is no foreign code that is executed during reconstructing. For that
    reason, any type that shall be reconstructed needs to be importable
    (importing is done be fully-qualified module names).

    Returns
    -------
    object instance
    """
    if memo is None:
        # init object tracker
        memo = {}
    # note, older file formats did not store objrefs
    if 'objref' in hdf.attrs:
        objref = hdf.attrs['objref']
    else:
        objref = None

    # if this HDF group has an objref that points to an already recontructed
    # object, simple return this object again
    if not objref is None and objref in memo:
        obj = memo[objref]
        if __debug__:
            debug('HDF5', "Use tracked object %s (%i)" % (type(obj), objref))
        return obj

    #
    # Actual data
    #
    if isinstance(hdf, h5py.Dataset):
        if __debug__:
            debug('HDF5', "Load from HDF5 dataset [%s]" % hdf.name)
        if 'is_scalar' in hdf.attrs:
            # extract the scalar from the 0D array
            obj = hdf[()]
            # and coerce it back into the native Python type if necessary
            if issubclass(type(obj), np.generic):
                obj = np.asscalar(obj)
        elif 'is_numpy_scalar' in hdf.attrs:
            # extract the scalar from the 0D array as is
            obj = hdf[()]
        else:
            # read array-dataset into an array
            obj = np.empty(hdf.shape, hdf.dtype)
            hdf.read_direct(obj)
    else:
        # check if we have a class instance definition here
        if not ('class' in hdf.attrs or 'recon' in hdf.attrs):
            raise LookupError("Found hdf group without class instance "
                    "information (group: %s). Cannot convert it into an "
                    "object (content: '%s', attributes: '%s')."
                    % (hdf.name, hdf.keys(), hdf.attrs.keys()))

        mod_name = hdf.attrs['module']

        if 'recon' in hdf.attrs:
            # Custom objects custom reconstructor
            obj = _recon_customobj_customrecon(hdf, memo)
        elif mod_name != '__builtin__':
            # Custom objects default reconstructor
            cls_name = hdf.attrs['class']
            if cls_name in ('function', 'type', 'builtin_function_or_method'):
                # Functions and types
                obj = _recon_functype(hdf)
            else:
                # Other custom objects
                obj = _recon_customobj_defaultrecon(hdf, memo)
        else:
            # Built-in objects
            cls_name = hdf.attrs['class']
            if __debug__:
                debug('HDF5', "Reconstructing built-in object '%s'." % cls_name)
            # built in type (there should be only 'list', 'dict' and 'None'
            # that would not be in a Dataset
            if cls_name == 'NoneType':
                obj = None
            elif cls_name == 'tuple':
                obj = _hdf_tupleitems_to_obj(hdf, memo)
            elif cls_name == 'list':
                obj = _hdf_list_to_obj(hdf, memo)
            elif cls_name == 'dict':
                obj = _hdf_dict_to_obj(hdf, memo)
            elif cls_name == 'type':
                obj = eval(hdf.attrs['name'])
            elif cls_name == 'function':
                raise RuntimeError("Unhandled reconstruction of built-in "
                        "function (at '%s')." % hdf.name)
            else:
                raise RuntimeError("Found hdf group with a builtin type "
                        "that is not handled by the parser (group: %s). This "
                        "is a conceptual bug in the parser. Please report."
                        % hdf.name)
    #
    # Final post-processing
    #
    if 'is_objarray' in hdf.attrs:
        # need to handle special case of arrays of objects
        if np.isscalar(obj):
            obj = np.array(obj, dtype=np.object)
        else:
            obj = asobjarray(obj)
        if 'shape' in hdf.attrs:
            shape = tuple(hdf.attrs['shape'])
            if shape != obj.shape:
                obj = obj.reshape(shape)

    # track if desired
    if objref:
        memo[objref] = obj
    if __debug__:
        debug('HDF5', "Done loading %s [%s]"
                      % (type(obj), hdf.name))
    return obj