Пример #1
0
    def __reverse_single_level(self, wp):

        # local bindings
        level_paths = self.__level_paths

        # define wavelet packet to use
        WP = pywt.WaveletPacket(
            data=None, wavelet=self._wavelet,
            mode=self._mode, maxlevel=self.__level)

        # prepare storage
        signal_shape = wp.shape[:1] + self._inshape[1:]
        signal = np.zeros(signal_shape)
        Ntime_points = self._intimepoints
        for indexes in _get_indexes(signal_shape,
                                   self._dim):
            if __debug__:
                debug('MAP_', " %s" % (indexes,), lf=False, cr=True)

            for path, level_data in zip(level_paths, wp[indexes]):
                WP[path] = level_data

            signal[indexes] = WP.reconstruct(True)[:Ntime_points]

        return signal
Пример #2
0
def _hdf_list_to_objarray(hdf, memo):
    if not ('shape' in hdf.attrs):
        if __debug__:
            debug('HDF5', "Enountered objarray stored without shape (due to a bug "
                "in post 2.1 release).  Some nested structures etc might not be "
                "loaded incorrectly")
        # yoh: we have possibly a problematic case due to my fix earlier
        # resolve to old logic:  nested referencing might not work :-/
        obj = _hdf_list_to_obj(hdf, memo)
        # need to handle special case of arrays of objects
        if np.isscalar(obj):
            obj = np.array(obj, dtype=np.object)
        else:
            obj = asobjarray(obj)
    else:
        shape = tuple(hdf.attrs['shape'])
        # reserve space first
        if len(shape):
            obj = np.empty(np.prod(shape), dtype=object)
        else:
            # scalar
            obj = np.array(None, dtype=object)
        # now load the items from the list, noting existence of this
        # container
        obj_items = _hdf_list_to_obj(hdf, memo, target_container=obj)
        # assign to the object array
        for i, v in enumerate(obj_items):
            obj[i] = v
        if len(shape) and shape != obj.shape:
            obj = obj.reshape(shape)
    return obj
Пример #3
0
    def _call(self, dataset):
        sensitivities = []
        for ind, analyzer in enumerate(self.__analyzers):
            if __debug__:
                debug("SA", "Computing sensitivity for SA#%d:%s" %
                      (ind, analyzer))
            sensitivity = analyzer(dataset)
            sensitivities.append(sensitivity)

        if __debug__:
            debug("SA",
                  "Returning %d sensitivities from %s" %
                  (len(sensitivities), self.__class__.__name__))

        sa_attr = self._sa_attr
        if isinstance(sensitivities[0], AttrDataset):
            smerged = None
            for i, s in enumerate(sensitivities):
                s.sa[sa_attr] = np.repeat(i, len(s))
                if smerged is None:
                    smerged = s
                else:
                    smerged.append(s)
            sensitivities = smerged
        else:
            sensitivities = \
                Dataset(sensitivities,
                        sa={sa_attr: np.arange(len(sensitivities))})

        self.ca.sensitivities = sensitivities

        return sensitivities
Пример #4
0
    def _get_selected_ids(self, dataset):
        """Given a dataset actually select the features

        Returns
        -------
        indexes of the selected features
        """
        # optionally train the analyzer first
        if self.__train_analyzer:
            self.__sensitivity_analyzer.train(dataset)

        sensitivity = self.__sensitivity_analyzer(dataset)
        """Compute the sensitivity map."""
        self.ca.sensitivity = sensitivity

        # Select features to preserve
        selected_ids = self.__feature_selector(sensitivity)

        if __debug__:
            debug("FS_", "Sensitivity: %s Selected ids: %s" %
                  (sensitivity, selected_ids))

        # XXX not sure if it really has to be sorted
        selected_ids.sort()
        return selected_ids
Пример #5
0
def _suppress_scipy_warnings():
    # Infiltrate warnings if necessary
    numpy_ver = versions['numpy']
    scipy_ver = versions['scipy']
    # There is way too much deprecation warnings spit out onto the
    # user. Lets assume that they should be fixed by scipy 0.7.0 time
    if not __debug__ or (__debug__ and 'PY' not in debug.active):
        filter_lines = []
        if "0.6.0" <= scipy_ver and scipy_ver < "0.7.0" \
            and numpy_ver > "1.1.0":
            if __debug__:
                debug('EXT', "Setting up filters for numpy DeprecationWarnings "
                      "regarding scipy < 0.7.0")
            filter_lines += [
                ('NumpyTest will be removed in the next release.*',
                 DeprecationWarning),
                ('PyArray_FromDims: use PyArray_SimpleNew.',
                 DeprecationWarning),
                ('PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.',
                 DeprecationWarning),
                # Trick re.match, since in warnings absent re.DOTALL in re.compile
                ('[\na-z \t0-9]*The original semantics of histogram is scheduled to be.*'
                 '[\na-z \t0-9]*', Warning) ]
        if scipy_ver >= "0.15":
            filter_lines += [("`scipy.weave` is deprecated, use `weave` instead!",
                              DeprecationWarning)]
        if scipy_ver >= "0.16":
            # scipy deprecated it but statsmodels still import it for now
            filter_lines += [("`scipy.linalg.calc_lwork` is deprecated!",
                              DeprecationWarning)]
        for f, w in filter_lines:
            warnings.filterwarnings('ignore', f, w)
Пример #6
0
        def newfunc(*arg, **kwargs):
            nfailed, i = 0, 0  # define i just in case
            for i in xrange(niter):
                try:
                    ret = func(*arg, **kwargs)
                    if i + 1 - nfailed >= niter - nfailures:
                        # so we know already that we wouldn't go over
                        # nfailures
                        break
                except AssertionError, e:
                    nfailed += 1
                    if __debug__:
                        debug("TEST", "Upon %i-th run, test %s failed with %s", (i, func.__name__, e))

                    if nfailed > nfailures:
                        if __debug__:
                            debug(
                                "TEST",
                                "Ran %s %i times. Got %d failures, "
                                "while was allowed %d "
                                "-- re-throwing the last failure %s",
                                (func.__name__, i + 1, nfailed, nfailures, e),
                            )
                        exc_info = sys.exc_info()
                        raise exc_info[1], None, exc_info[2]
Пример #7
0
    def _train(self, samples):
        """Perform network training.

        Parameters
        ----------
        samples : array-like
            Used for unsupervised training of the SOM.
          
        Notes
        -----
        It is assumed that prior to calling this method the _pretrain method 
        was called with the same argument.  
        """

        # ensure that dqd was set properly
        dqd = self._dqd
        if dqd is None:
            raise ValueError("This should not happen - was _pretrain called?")

        # units weight vector deltas for batch training
        # (height x width x #features)
        unit_deltas = np.zeros(self._K.shape, dtype='float')

        # for all iterations
        for it in xrange(1, self.niter + 1):
            # compute the neighborhood impact kernel for this iteration
            # has to be recomputed since kernel shrinks over time
            k = self._compute_influence_kernel(it, dqd)

            # for all training vectors
            for s in samples:
                # determine closest unit (as element coordinate)
                b = self._get_bmu(s)
                # train all units at once by unfolding the kernel (from the
                # single quadrant that is precomputed), cutting it to the
                # right shape and simply multiply it to the difference of target
                # and all unit weights....
                infl = np.vstack((
                        np.hstack((
                            # upper left
                            k[b[0]:0:-1, b[1]:0:-1],
                            # upper right
                            k[b[0]:0:-1, :self.kshape[1] - b[1]])),
                        np.hstack((
                            # lower left
                            k[:self.kshape[0] - b[0], b[1]:0:-1],
                            # lower right
                            k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]]))
                               ))
                unit_deltas += infl[:, :, np.newaxis] * (s - self._K)

            # apply cumulative unit deltas
            self._K += unit_deltas

            if __debug__:
                debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" %
                      (it, self.niter, np.sqrt(np.sum(unit_deltas ** 2))))

            # reset unit deltas
            unit_deltas.fill(0.)
Пример #8
0
    def _forward_dataset(self, dataset):
        # invoke super class _forward_dataset, this calls, _forward_dataset
        # and this calls _forward_data in this class
        mds = super(FlattenMapper, self)._forward_dataset(dataset)
        # attribute collection needs to have a new length check
        mds.fa.set_length_check(mds.nfeatures)
        # we need to duplicate all existing feature attribute, as each original
        # feature is now spread across the new feature axis
        # take all "additional" axes after the actual feature axis and count
        # elements a sample -- if not axis exists this will be 1
        for k in dataset.fa:
            if __debug__:
                debug("MAP_", "Forward-mapping fa '%s'." % k)
            attr = dataset.fa[k].value
            # the maximmum number of axis to flatten in the attr
            if not self.__maxdims is None:
                maxdim = min(len(self.__origshape), self.__maxdims)
            else:
                maxdim = len(self.__origshape)
            multiplier = mds.nfeatures / np.prod(attr.shape[:maxdim])
            if __debug__:
                debug("MAP_", "Broadcasting fa '%s' %s %d times" % (k, attr.shape, multiplier))
            # broadcast as many times as necessary to get 'matching dimensions'
            bced = np.repeat(attr, multiplier, axis=0)
            # now reshape as many dimensions as the mapper knows about
            mds.fa[k] = bced.reshape((-1,) + bced.shape[maxdim:])

        # if there is no inspace return immediately
        if self.get_space() is None:
            return mds
        # otherwise create the coordinates as feature attributes
        else:
            mds.fa[self.get_space()] = list(np.ndindex(dataset.samples[0].shape))
            return mds
Пример #9
0
    def _postcall(self, ds, result):
        """Postprocessing of results.

        By default, does nothing.

        Parameters
        ----------
        ds: Dataset
          Original input dataset.
        result: Dataset
          Preliminary result dataset (as produced by ``_call()``).

        Returns
        -------
        Dataset
        """
        if not self.__postproc is None:
            if __debug__:
                debug("NO",
                      "Applying post-processing node %s", (self.__postproc,))
            self.ca.raw_results = result

            result = self.__postproc(result)

        return result
Пример #10
0
    def _train(self, samples):
        """Determine the projection matrix onto the SVD components from
        a 2D samples x feature data matrix.
        """
        X = np.asmatrix(samples)
        X = self._demean_data(X)

        # singular value decomposition
        U, SV, Vh = np.linalg.svd(X, full_matrices=0)

        # store the final matrix with the new basis vectors to project the
        # features onto the SVD components. And store its .H right away to
        # avoid computing it in forward()
        self._proj = Vh.H

        # also store singular values of all components
        self._sv = SV

        if __debug__:
            debug("MAP", "SVD was done on %s and obtained %d SVs " %
                  (samples, len(SV)) + " (%d non-0, max=%f)" %
                  (len(SV.nonzero()), SV[0]))
            # .norm might be somewhat expensive to compute
            if "MAP_" in debug.active:
                debug("MAP_", "Mixing matrix has %s shape and norm=%f" %
                      (self._proj.shape, np.linalg.norm(self._proj)))
Пример #11
0
 def __init__(self, space=None, pass_attr=None, postproc=None, **kwargs):
     """
     Parameters
     ----------
     space : str, optional
       Name of the 'processing space'. The actual meaning of this argument
       heavily depends on the sub-class implementation. In general, this is
       a trigger that tells the node to compute and store information about
       the input data that is "interesting" in the context of the
       corresponding processing in the output dataset.
     pass_attr : str, list of str, optional
       What attribute(s) (from sa, fa, a collections, see
       :meth:`Dataset.get_attr`) to pass from original dataset
       provided to __call__ (before applying postproc), or from
       'ca' collection of this instance (use 'ca.' prefix)
       into the resultant dataset.
     postproc : Node instance, optional
       Node to perform post-processing of results. This node is applied
       in `__call__()` to perform a final processing step on the to be
       result dataset. If None, nothing is done.
     """
     ClassWithCollections.__init__(self, **kwargs)
     if __debug__:
         debug("NO",
               "Init node '%s' (space: '%s', postproc: '%s')",
               (self.__class__.__name__, space, str(postproc)))
     self.set_space(space)
     self.set_postproc(postproc)
     if isinstance(pass_attr, basestring):
         pass_attr = (pass_attr,)
     self.__pass_attr = pass_attr
Пример #12
0
 def _untrain(self):
     if __debug__:
         debug("FS_", "Untraining Iterative FS: %s" % self)
     self._fmeasure.untrain()
     self._pmeasure.untrain()
     # ask base class to do its untrain
     super(IterativeFeatureSelection, self)._untrain()
Пример #13
0
    def _prepredict(self, dataset):
        """Functionality prior prediction
        """
        if not ('notrain2predict' in self.__tags__):
            # check if classifier was trained if that is needed
            if not self.trained:
                raise FailedToPredictError(
                      "Classifier %s wasn't yet trained, therefore can't "
                      "predict" % self)
            nfeatures = dataset.nfeatures #data.shape[1]
            # check if number of features is the same as in the data
            # it was trained on
            if nfeatures != self.__trainednfeatures:
                raise ValueError, \
                      "Classifier %s was trained on data with %d features, " % \
                      (self, self.__trainednfeatures) + \
                      "thus can't predict for %d features" % nfeatures


        if self.params.retrainable:
            if not self.__changedData_isset:
                self.__reset_changed_data()
                _changedData = self._changedData
                data = np.asanyarray(dataset.samples)
                _changedData['testdata'] = \
                                        self.__was_data_changed('testdata', data)
                if __debug__:
                    debug('CLF_', "prepredict: Obtained _changedData is %s",
                          (_changedData,))
Пример #14
0
 def _set(self, val):
     if __debug__ and __mvpadebug__:
         # Since this call is quite often, don't convert
         # values to strings here, rely on passing them
         # withing msgargs
         debug("COL", "Setting %s to %s ", (self, val))
     self._value = val
Пример #15
0
    def __was_data_changed(self, key, entry, update=True):
        """Check if given entry was changed from what known prior.

        If so -- store only the ones needed for retrainable beastie
        """
        idhash_ = idhash(entry)
        __idhashes = self.__idhashes

        changed = __idhashes[key] != idhash_
        if __debug__ and 'CHECK_RETRAIN' in debug.active:
            __trained = self.__trained
            changed2 = entry != __trained[key]
            if isinstance(changed2, np.ndarray):
                changed2 = changed2.any()
            if changed != changed2 and not changed:
                raise RuntimeError, \
                  'idhash found to be weak for %s. Though hashid %s!=%s %s, '\
                  'estimates %s!=%s %s' % \
                  (key, idhash_, __idhashes[key], changed,
                   entry, __trained[key], changed2)
            if update:
                __trained[key] = entry

        if __debug__ and changed:
            debug('CLF_', "Changed %s from %s to %s.%s",
                  (key, __idhashes[key], idhash_,
                   ('','updated')[int(update)]))
        if update:
            __idhashes[key] = idhash_

        return changed
Пример #16
0
    def label_voxel(self, c, levels = None):

        if self.__referenceLevel is None:
            warning("You did not provide what level to use "
                    "for reference. Assigning 0th level -- '%s'"
                    % (self._levels[0],))
            self.set_reference_level(0)
            # return self.__referenceAtlas.label_voxel(c, levels)

        c = self._check_range(c)

        # obtain coordinates of the closest voxel
        cref = self._data[ self.__referenceLevel.indexes, c[0], c[1], c[2] ]
        dist = norm( (cref - c) * self.voxdim )
        if __debug__:
            debug('ATL__', "Closest referenced point for %r is "
                  "%r at distance %3.2f" % (c, cref, dist))
        if (self.distance - dist) >= 1e-3: # neglect everything smaller
            result = self.__referenceAtlas.label_voxel(cref, levels)
            result['voxel_referenced'] = c
            result['distance'] = dist
        else:
            result = self.__referenceAtlas.label_voxel(c, levels)
            if __debug__:
                debug('ATL__', "Closest referenced point is "
                      "further than desired distance %.2f" % self.distance)
            result['voxel_referenced'] = None
            result['distance'] = 0
        return result
Пример #17
0
    def _train(self, samples):
        """Perform network training.

        Parameter
        ---------
        samples : array-like
          Used for unsupervised training of the SOM.
        """
        # XXX initialize with clever default, e.g. plain of first two PCA
        # components
        self._K = np.random.standard_normal(tuple(self.kshape) + (samples.shape[1],))

        # units weight vector deltas for batch training
        # (height x width x #features)
        unit_deltas = np.zeros(self._K.shape, dtype='float')

        # precompute distance kernel between elements in the Kohonen layer
        # that will remain constant throughout the training
        # (just compute one quadrant, as the distances are symmetric)
        # XXX maybe do other than squared Euclidean?
        dqd = np.fromfunction(lambda x, y: (x**2 + y**2)**0.5,
                             self.kshape, dtype='float')

        # for all iterations
        for it in xrange(1, self.niter + 1):
            # compute the neighborhood impact kernel for this iteration
            # has to be recomputed since kernel shrinks over time
            k = self._compute_influence_kernel(it, dqd)

            # for all training vectors
            for s in samples:
                # determine closest unit (as element coordinate)
                b = self._get_bmu(s)
                # train all units at once by unfolding the kernel (from the
                # single quadrant that is precomputed), cutting it to the
                # right shape and simply multiply it to the difference of target
                # and all unit weights....
                infl = np.vstack((
                        np.hstack((
                            # upper left
                            k[b[0]:0:-1, b[1]:0:-1],
                            # upper right
                            k[b[0]:0:-1, :self.kshape[1] - b[1]])),
                        np.hstack((
                            # lower left
                            k[:self.kshape[0] - b[0], b[1]:0:-1],
                            # lower right
                            k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]]))
                               ))
                unit_deltas += infl[:,:,np.newaxis] * (s - self._K)

            # apply cumulative unit deltas
            self._K += unit_deltas

            if __debug__:
                debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" %
                      (it, self.niter, np.sqrt(np.sum(unit_deltas **2))))

            # reset unit deltas
            unit_deltas.fill(0.)
Пример #18
0
def _binary_data_bytecount(niml):
    '''helper function that returns how many bytes a NIML binary data
    element should have'''
    niform = niml['ni_form']
    if not 'binary' in niform:
        raise ValueError('Illegal niform %s' % niform)

    tps = niml['vec_typ']
    onetype = types.findonetype(tps)

    if onetype is None:
        debug('NIML', 'Not unique type: %r', tps)
        return None

    # numeric, either int or float
    ncols = niml['vec_num']
    nrows = niml['vec_len']
    tp = types.code2numpy_type(onetype)
    bytes_per_elem = types.numpy_type2bytecount(tp)

    if bytes_per_elem is None:
        raise ValueError("Type not supported: %r" % onetype)

    nb = ncols * nrows * bytes_per_elem

    debug('NIML', 'Number of bytes for %s: %d x %d with %d bytes / element',
                                    (niform, ncols, nrows, bytes_per_elem))

    return nb
Пример #19
0
    def _level3(self, datasets):
        params = self.params            # for quicker access ;)
        # create a mapper per dataset
        mappers = [deepcopy(params.alignment) for ds in datasets]

        # key different from level-2; the common space is uniform
        #temp_commonspace = commonspace

        residuals = None
        if self.ca['residual_errors'].enabled:
            residuals = np.zeros((1, len(datasets)))
            self.ca.residual_errors = Dataset(samples=residuals)

        # start from original input datasets again
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 3: ds #%i" % i)

            # retrain mapper on final common space
            ds_new.sa[m.get_space()] = self.commonspace
            m.train(ds_new)
            # remove common space attribute again to save on memory
            del ds_new.sa[m.get_space()]

            if residuals is not None:
                # obtain final projection
                data_mapped = m.forward(ds_new.samples)
                residuals[0, i] = np.linalg.norm(data_mapped - self.commonspace)

        return mappers
Пример #20
0
 def _untrain(self):
     if __debug__:
         debug("FS_", "Untraining combined FS: %s" % self)
     for fs in self.__selectors:
         fs.untrain()
     # ask base class to do its untrain
     super(CombinedFeatureSelection, self)._untrain()
Пример #21
0
    def __init__(self, index=None, *args, **kwargs):
        """
        Parameters
        ----------
        value : arbitrary (see derived implementations)
          The actual value of this attribute.
        **kwargs
          Passed to `Collectable`
        """
        if index is None:
            IndexedCollectable._instance_index += 1
            index = IndexedCollectable._instance_index
        else:
            # TODO: there can be collision between custom provided indexes
            #       and the ones automagically assigned.
            #       Check might be due
            pass
        self._instance_index = index

        self._isset = False
        self.reset()

        Collectable.__init__(self, *args, **kwargs)

        if __debug__ and 'COL' in debug.active:
            debug("COL", "Initialized new IndexedCollectable #%d:%s %r",
                  (index, self.name, self))
Пример #22
0
    def _train(self, dataset):
        """Select the most important features

        Parameters
        ----------
        dataset : Dataset
          used to compute sensitivity maps
        """
        # optionally train the analyzer first
        if self.__train_analyzer:
            self.__sensitivity_analyzer.train(dataset)

        sensitivity = self.__sensitivity_analyzer(dataset)
        """Compute the sensitivity map."""

        self.ca.sensitivity = sensitivity

        # Select features to preserve
        selected_ids = self.__feature_selector(sensitivity)

        if __debug__:
            debug("FS_", "Sensitivity: %s Selected ids: %s" %
                  (sensitivity, selected_ids))

        # XXX not sure if it really has to be sorted
        selected_ids.sort()
        # announce desired features to the underlying slice mapper
        self._safe_assign_slicearg(selected_ids)
        # and perform its own training
        super(SensitivityBasedFeatureSelection, self)._train(dataset)
Пример #23
0
    def _train(self, samples):
        """Train PrototypeMapper
        """

        self._proj = np.hstack([similarity.computed(samples, self.prototypes) for similarity in self.similarities])
        if __debug__:
            debug("MAP", "projected data of shape %s: %s " % (self._proj.shape, self._proj))
Пример #24
0
    def get_selected_indexes(self, n_cfgs):
        """A naive selection of indexes according to strategy and count

        Parameters
        ----------
        n_cfgs: int
          Total number of configurations to select from
        """
        strategy = self.selection_strategy
        count = self.count
        if strategy == 'first':
            indexes = slice(0, count)
        elif strategy in ['equidistant', 'random']:
            if strategy == 'equidistant':
                # figure out what step is needed to
                # accommodate the `count` number
                step = float(n_cfgs) / count
                assert (step >= 1.0)
                indexes = [int(round(step * i)) for i in xrange(count)]
            elif strategy == 'random':
                indexes = np.random.permutation(range(n_cfgs))[:count]
                # doesn't matter much but lets keep them in the original
                # order at least
                indexes.sort()
            else:
                # who said that I am paranoid?
                raise RuntimeError("Really should not happen")
        if __debug__:
            debug("SPL", "For %s selection strategy selected %s "
                         "partition specs from %d total",
                  (strategy, indexes, n_cfgs))
        return indexes
Пример #25
0
    def _call(self, dataset):
        # OPT: local bindings
        clfclf = self.clf.clf
        analyzer = self.__analyzer

        if analyzer is None:
            analyzer = clfclf.get_sensitivity_analyzer(
                **(self._slave_kwargs))
            if analyzer is None:
                raise ValueError, \
                      "Wasn't able to figure basic analyzer for clf %s" % \
                      `clfclf`
            if __debug__:
                debug("SA", "Selected analyzer %s for clf %s" % \
                      (analyzer, clfclf))
            # bind to the instance finally
            self.__analyzer = analyzer

        # TODO "remove" unnecessary things below on each call...
        # assign corresponding classifier
        analyzer.clf = clfclf

        # if clf was trained already - don't train again
        if clfclf.trained:
            analyzer._force_train = False

        result = analyzer._call(dataset)
        self.ca.clf_sensitivities = result

        return result
Пример #26
0
    def __init__(self, value=None, name=None, doc=None):
        """
        Parameters
        ----------
        value : arbitrary (see derived implementations)
          The actual value of this attribute.
        name : str
          Name of the collectable under which it should be available in its
          respective collection.
        doc : str
          Documentation about the purpose of this collectable.
        """
        if doc is not None:
            # to prevent newlines in the docstring
            try:
                doc = re.sub('[\n ]+', ' ', doc)
            except TypeError:
                # catch some old datasets stored in HDF5
                doc = re.sub('[\n ]+', ' ', np.asscalar(doc))

        self.__doc__ = doc
        self.__name = name
        self._value = None
        if value is not None:
            self._set(value)
        if __debug__ and __mvpadebug__:
            debug("COL", "Initialized %r", (self,))
Пример #27
0
    def _forward_dataset(self, dataset):
        """Forward-map a dataset.

        This is a private method that can be reimplemented in derived
        classes. The default implementation forward-maps the dataset samples
        and returns a new dataset that is a shallow copy of the input with
        the mapped samples.

        Parameters
        ----------
        dataset : Dataset-like
        """
        if __debug__:
            debug('MAP_', "Forward-map %s-shaped samples in dataset with '%s'."
                        % (dataset.samples.shape, self))
        msamples = self._forward_data(dataset.samples)
        if __debug__:
            debug('MAP_', "Make shallow copy of to-be-forward-mapped dataset "
                    "and assigned forward-mapped samples ({sf}a_filters: "
                    "%s, %s, %s)." % (self._sa_filter, self._fa_filter,
                                      self._a_filter))
        mds = dataset.copy(deep=False,
                           sa=self._sa_filter,
                           fa=self._fa_filter,
                           a=self._a_filter)
        mds.samples = msamples
        return mds
Пример #28
0
    def _call(self, dataset=None):
        """Extract weights from SMLR classifier.

        SMLR always has weights available, so nothing has to be computed here.
        """
        clf = self.clf
        # transpose to have the number of features on the second axis
        # (as usual)
        weights = clf.weights.T

        if __debug__:
            debug('SMLR',
                  "Extracting weights for %d-class SMLR" %
                  (len(weights) + 1) +
                  "Result: min=%f max=%f" %\
                  (np.min(weights), np.max(weights)))

        # limit the labels to the number of sensitivity sets, to deal
        # with the case of `fit_all_weights=False`
        ds = Dataset(weights,
                     sa={clf.get_space(): clf._ulabels[:len(weights)]})

        if clf.params.has_bias:
            ds.sa['biases'] = clf.biases
        return ds
Пример #29
0
    def _call(self, dataset):
        analyzers = []
        # create analyzers
        for clf in self.clf.clfs:
            if self.__analyzer is None:
                analyzer = clf.get_sensitivity_analyzer(**(self._slave_kwargs))
                if analyzer is None:
                    raise ValueError, \
                          "Wasn't able to figure basic analyzer for clf %r" % \
                          (clf,)
                if __debug__:
                    debug("SA", "Selected analyzer %r for clf %r" % \
                          (analyzer, clf))
            else:
                # XXX shallow copy should be enough...
                analyzer = copy.copy(self.__analyzer)

            # assign corresponding classifier
            analyzer.clf = clf
            # if clf was trained already - don't train again
            if clf.trained:
                analyzer._force_train = False
            analyzers.append(analyzer)

        self.__combined_analyzer.analyzers = analyzers

        # XXX not sure if we don't want to call directly ._call(dataset) to avoid
        # double application of transformers/combiners, after all we are just
        # 'proxying' here to combined_analyzer...
        # YOH: decided -- lets call ._call
        return self.__combined_analyzer._call(dataset)
Пример #30
0
    def forward(self, data):
        """Map data from input to output space.

        Parameters
        ----------
        data : Dataset-like, (at least 2D)-array-like
          Typically this is a `Dataset`, but it might also be a plain data
          array, or even something completely different(TM) that is supported
          by a subclass' implementation. If such an object is Dataset-like it
          is handled by a dedicated method that also transforms dataset
          attributes if necessary. If an array-like is passed, it has to be
          at least two-dimensional, with the first axis separating samples
          or observations. For single samples `forward1()` might be more
          appropriate.
        """
        if is_datasetlike(data):
            if __debug__:
                debug('MAP', "Forward-map %s-shaped dataset through '%s'."
                        % (data.shape, self))
            return self._forward_dataset(data)
        else:
            if hasattr(data, 'ndim') and data.ndim < 2:
                raise ValueError(
                    'Mapper.forward() only support mapping of data with '
                    'at least two dimensions, where the first axis '
                    'separates samples/observations. Consider using '
                    'Mapper.forward1() instead.')
            if __debug__:
                debug('MAP', "Forward-map data through '%s'." % (self))
            return self._forward_data(data)
Пример #31
0
    def _wm_reverse(self, data):
        if __debug__:
            debug('MAP', "Converting signal back using DWP")

        if self.__level is None:
            raise NotImplementedError
        else:
            if not externals.exists('pywt wp reconstruct'):
                raise NotImplementedError, \
                      "Reconstruction for a single level for versions of " \
                      "pywt < 0.1.7 (revision 103) is not supported"
            if not externals.exists('pywt wp reconstruct fixed'):
                warning(
                    "%s: Reverse mapping with this version of 'pywt' might "
                    "result in incorrect data in the tails of the signal. "
                    "Please check for an update of 'pywt', or be careful "
                    "when interpreting the edges of the reverse mapped "
                    "data." % self.__class__.__name__)
            return self.__reverse_single_level(data)
Пример #32
0
    def __init__(self, kernel=None, **kwargs):
        """Initialize a GPR regression analysis.

        Parameters
        ----------
        kernel : Kernel
          a kernel object defining the covariance between instances.
          (Defaults to SquaredExponentialKernel if None in arguments)
        """
        # init base class first
        Classifier.__init__(self, **kwargs)

        # It does not make sense to calculate a confusion matrix for a GPR
        # XXX it does ;) it will be a RegressionStatistics actually ;-)
        # So if someone desires -- let him have it
        # self.ca.enable('training_stats', False)

        # set kernel:
        if kernel is None:
            kernel = SquaredExponentialKernel()
            debug("GPR",
                  "No kernel was provided, falling back to default: %s"
                  % kernel)
        self.__kernel = kernel

        # append proper clf_internal depending on the kernel
        # TODO: add "__tags__" to kernels since the check
        #       below does not scale
        if isinstance(kernel, GeneralizedLinearKernel) or \
           isinstance(kernel, LinearKernel):
            self.__tags__ += ['linear']
        else:
            self.__tags__ += ['non-linear']

        if externals.exists('openopt') \
               and not 'has_sensitivity' in self.__tags__:
            self.__tags__ += ['has_sensitivity']

        # No need to initialize conditional attributes. Unless they got set
        # they would raise an exception self.predicted_variances =
        # None self.log_marginal_likelihood = None
        self._init_internals()
        pass
Пример #33
0
def __check(name, a='__version__'):
    exec "import %s" % name
    # it might be lxml.etree, so take only first module
    topmodname = name.split('.')[0]
    try:
        v = getattr(sys.modules[name], '__version__')
    except Exception as e:
        # we can't assign version but it is there
        if __debug__:
            debug('EXT',
                  'Failed to acquire a version of %(name)s: %(e)s' % locals())
        # if module is present but does not bear __version__
        try:
            import pkg_resources
            v = pkg_resources.get_distribution(topmodname).version
        except Exception as e:
            # and if all that failed -- just assign '0'
            v = '0'
    versions[topmodname] = SmartVersion(v)
    return True  # we did manage to import it -- so it is there
Пример #34
0
    def _get_cvec(self, data):
        """Estimate default and return scaled by it negative user's C values
        """
        if not 'C' in self.params:#svm_type in [_svm.svmc.C_SVC]:
            raise RuntimeError("Requested estimation of default C whenever C was not set")

        C = self.params.C
        if not is_sequence_type(C):
            # we were not given a tuple for balancing between classes
            C = [C]

        Cs = list(C[:])               # copy
        for i in range(len(Cs)):
            if Cs[i] < 0:
                Cs[i] = self._get_default_c(data.samples)*abs(Cs[i])
                if __debug__:
                    debug("SVM", "Default C for %s was computed to be %s" %
                          (C[i], Cs[i]))

        return Cs
Пример #35
0
 def test_debug(self):
     verbose.handlers = []           # so debug doesn't spoil it
     debug.active = ['1', '2', 'SLC']
     debug.metrics = debug._known_metrics.keys()
     # do not offset for this test
     debug('SLC', self.msg, lf=False)
     self.assertRaises(ValueError, debug, 3, 'bugga')
     #Should complain about unknown debug id
     svalue = self.sout.getvalue()
     regexp = "\[SLC\] DBG(?:{.*})?: %s" % self.msg
     rematch = re.match(regexp, svalue)
     self.assertTrue(rematch, msg="Cannot match %s with regexp %s" %
                     (svalue, regexp))
     # find metrics
     self.assertTrue('RSS/VMS:' in svalue,
                     msg="Cannot find vmem metric in " + svalue)
     self.assertTrue('>test_verbosity:' in svalue,
                     msg="Cannot find tbc metric in " + svalue)
     self.assertTrue(' sec' in svalue,
                     msg="Cannot find tbc metric in " + svalue)
Пример #36
0
def _recon_customobj_customrecon(hdf, memo):
    """Reconstruct a custom object from HDF using a custom recontructor"""
    # we found something that has some special idea about how it wants
    # to be reconstructed
    mod_name = hdf.attrs['module']
    recon_name = hdf.attrs['recon']
    if __debug__:
        debug(
            'HDF5', "Load from custom reconstructor '%s.%s' [%s]" %
            (mod_name, recon_name, hdf.name))
    # turn names into definitions
    try:
        mod = __import__(mod_name, fromlist=[recon_name])
    except ImportError, e:
        if mod_name.startswith('mvpa') and not mod_name.startswith('mvpa2'):
            # try to be gentle on data that got stored with PyMVPA 0.5 or 0.6
            mod_name = mod_name.replace('mvpa', 'mvpa2', 1)
            mod = __import__(mod_name, fromlist=[recon_name])
        else:
            raise e
Пример #37
0
    def __call__(self, ds):
        """
        .. note:
           Will raise KeyError if lookup for sample_ids fails, or ds has not
           been mapped at all
           """
        if (not 'magic_id' in ds.a) or ds.a.magic_id != self._orig_ds_id:
            raise KeyError, \
                  'Dataset %s is not indexed by %s' % (ds, self)

        _map = self._map
        _origids = ds.sa.origids

        res = np.array([_map[i] for i in _origids])
        if __debug__:
            debug('SAL',
                  "Successful lookup: %(inst)s on %(ds)s having "
                  "origids=%(origids)s resulted in %(res)s",
                  msgargs=dict(inst=self, ds=ds, origids=_origids, res=res))
        return res
Пример #38
0
def wr1996(size=200):
    """Generate '6d robot arm' dataset (Williams and Rasmussen 1996)

    Was originally created in order to test the correctness of the
    implementation of kernel ARD.  For full details see:
    http://www.gaussianprocess.org/gpml/code/matlab/doc/regression.html#ard

    x_1 picked randomly in [-1.932, -0.453]
    x_2 picked randomly in [0.534, 3.142]
    r_1 = 2.0
    r_2 = 1.3
    f(x_1,x_2) = r_1 cos (x_1) + r_2 cos(x_1 + x_2) + N(0,0.0025)
    etc.

    Expected relevances:
    ell_1      1.804377
    ell_2      1.963956
    ell_3      8.884361
    ell_4     34.417657
    ell_5   1081.610451
    ell_6    375.445823
    sigma_f    2.379139
    sigma_n    0.050835
    """
    intervals = np.array([[-1.932, -0.453], [0.534, 3.142]])
    r = np.array([2.0, 1.3])
    x = np.random.rand(size, 2)
    x *= np.array(intervals[:, 1] - intervals[:, 0])
    x += np.array(intervals[:, 0])
    if __debug__:
        for i in xrange(2):
            debug(
                'DG', '%d columnt Min: %g Max: %g' %
                (i, x[:, i].min(), x[:, i].max()))
    y = r[0] * np.cos(x[:, 0] + r[1] * np.cos(x.sum(1))) + \
        np.random.randn(size) * np.sqrt(0.0025)
    y -= y.mean()
    x34 = x + np.random.randn(size, 2) * 0.02
    x56 = np.random.randn(size, 2)
    x = np.hstack([x, x34, x56])
    return dataset_wizard(samples=x, targets=y)
Пример #39
0
    def reverse(self, data):
        """Reverse-map data from output back into input space.

        Parameters
        ----------
        data : Dataset-like, anything
          Typically this is a `Dataset`, but it might also be a plain data
          array, or even something completely different(TM) that is supported
          by a subclass' implementation. If such an object is Dataset-like it
          is handled by a dedicated method that also transforms dataset
          attributes if necessary.
        """
        if is_datasetlike(data):
            if __debug__:
                debug('MAP', "Reverse-map %s-shaped dataset through '%s'."
                        % (data.shape, self))
            return self._reverse_dataset(data)
        else:
            if __debug__:
                debug('MAP', "Reverse-map data through '%s'." % (self))
            return self._reverse_data(data)
Пример #40
0
def _set_matplotlib_backend():
    """Check if we have custom backend to set and it is different
    from current one
    """
    backend = cfg.get('matplotlib', 'backend')
    if backend:
        import matplotlib as mpl
        mpl_backend = mpl.get_backend().lower()
        if mpl_backend != backend.lower():
            if __debug__:
                debug('EXT_', "Trying to set matplotlib backend to %s" % backend)
            mpl.use(backend)
            import warnings
            # And disable useless warning from matplotlib in the future
            warnings.filterwarnings(
                'ignore', 'This call to matplotlib.use() has no effect.*',
                UserWarning)
        elif __debug__:
            debug('EXT_',
                  "Not trying to set matplotlib backend to %s since it was "
                  "already set" % backend)
Пример #41
0
    def reverse1(self, data):
        """Wrapper method to map single samples.

        It is basically identical to `reverse()`, but accepts one-dimensional
        arguments. To map whole dataset this method cannot be used. but
        `reverse()` handles them.
        """
        if isinstance(data, np.ndarray):
            data = data[np.newaxis]
        else:
            data = np.array([data])
        if __debug__:
            debug(
                'MAP', "Reverse-map single %s-shaped sample through '%s'." %
                (data.shape[1:], self))
        mapped = self.reverse(data)[0]
        if __debug__:
            debug(
                'MAP', "Mapped single %s-shaped sample to %s." %
                (data.shape[1:], mapped.shape))
        return mapped
Пример #42
0
 def __init__(self, space=None, postproc=None, **kwargs):
     """
     Parameters
     ----------
     space: str, optional
       Name of the 'processing space'. The actual meaning of this argument
       heavily depends on the sub-class implementation. In general, this is
       a trigger that tells the node to compute and store information about
       the input data that is "interesting" in the context of the
       corresponding processing in the output dataset.
     postproc : Node instance, optional
       Node to perform post-processing of results. This node is applied
       in `__call__()` to perform a final processing step on the to be
       result dataset. If None, nothing is done.
     """
     ClassWithCollections.__init__(self, **kwargs)
     if __debug__:
         debug("NO", "Init node '%s' (space: '%s', postproc: '%s')",
               (self.__class__.__name__, space, str(postproc)))
     self.set_space(space)
     self.set_postproc(postproc)
Пример #43
0
    def _cache(self, ds1, ds2=None):
        """Initializes internal lookups + _kfull via caching the kernel matrix
        """
        if __debug__ and 'KRN' in debug.active:
            debug('KRN', "Caching %(inst)s for ds1=%(ds1)s, ds2=%(ds1)s"
                  % dict(inst=self, ds1=ds1, ds2=ds2))

        self._lhsids = SamplesLookup(ds1)
        if (ds2 is None) or (ds2 is ds1):
            self._rhsids = self._lhsids
        else:
            self._rhsids = SamplesLookup(ds2)

        ckernel = self._kernel
        ckernel.compute(ds1, ds2)
        self._kfull = ckernel.as_raw_np()
        ckernel.cleanup()
        self._k = self._kfull

        self._recomputed = True
        self.params.reset()
Пример #44
0
    def _concat_results(sl=None, dataset=None, roi_ids=None, results=None):
        """The simplest implementation for collecting the results --
        just put them into a list

        This this implementation simply collects them into a list and
        uses only sl. for assigning conditional attributes.  But
        custom implementation might make use of more/less of them.
        Implemented as @staticmethod just to emphasize that in
        principle it is independent of the actual searchlight instance
        """
        # collect results
        results = sum(results, [])

        if __debug__ and 'SLC' in debug.active:
            debug('SLC', '')            # just newline
            resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A'
            debug('SLC', ' hstacking %d results of shape %s'
                  % (len(results), resshape))

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        result_ds = hstack(results)

        if __debug__:
            debug('SLC', " hstacked shape %s" % (result_ds.shape,))

        if sl.ca.is_enabled('roi_feature_ids'):
            sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results]
        if sl.ca.is_enabled('roi_sizes'):
            sl.ca.roi_sizes = [r.a.roi_sizes for r in results]
        if sl.ca.is_enabled('roi_center_ids'):
            sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results]

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if roi_ids is None:
                result_ds.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                feat_sel_mapper = StaticFeatureSelection(
                                    roi_ids, dshape=dataset.shape[1:])
                if hasattr(mapper, 'append'):
                    mapper.append(feat_sel_mapper)
                else:
                    mapper = ChainMapper([dataset.a.mapper,
                                          feat_sel_mapper])

                result_ds.a['mapper'] = mapper

        # store the center ids as a feature attribute
        result_ds.fa['center_ids'] = roi_ids

        return result_ds
Пример #45
0
        def newfunc(*arg, **kwargs):
            nfailed, i = 0, 0  # define i just in case
            for i in range(niter):
                try:
                    ret = func(*arg, **kwargs)
                    if i + 1 - nfailed >= niter - nfailures:
                        # so we know already that we wouldn't go over
                        # nfailures
                        break
                except AssertionError as e:
                    nfailed += 1
                    if __debug__:
                        debug('TEST', "Upon %i-th run, test %s failed with %s",
                              (i, func.__name__, e))

                    if nfailed > nfailures:
                        if __debug__:
                            debug(
                                'TEST', "Ran %s %i times. Got %d failures, "
                                "while was allowed %d "
                                "-- re-throwing the last failure %s",
                                (func.__name__, i + 1, nfailed, nfailures, e))
                        exc_info = sys.exc_info()
                        raise exc_info[1].with_traceback(exc_info[2])
            if __debug__:
                debug('TEST', "Ran %s %i times. Got %d failures.",
                      (func.__name__, i + 1, nfailed))
            return ret
Пример #46
0
    def __smart_apply_along_axis(self, data):
        # because apply_along_axis could be very much slower than a
        # direct invocation of native functions capable of operating
        # along specific axis, let's make it smarter for those we know
        # could do that.
        fx = None
        naxis = {'samples': 0, 'features': 1}[self.__axis]
        try:
            # if first argument is 'axis' -- just proceed with a native call
            if inspect.getargs(self.__fx.__code__).args[1] == 'axis':
                fx = self.__fx
            elif __debug__:
                debug('FX', "Will apply %s via apply_along_axis",
                          (self.__fx))
        except Exception as e:
            if __debug__:
                debug('FX',
                      "Failed to deduce either %s has 'axis' argument: %s",
                      (self.__fx, repr(e)))
            pass

        if fx is not None:
            if __debug__:
                debug('FX', "Applying %s directly to data giving axis=%d",
                      (self.__fx, naxis))
            mdata = fx(data, naxis, *self.__fxargs)
        else:
            # either failed to deduce signature or just didn't
            # have 'axis' second
            # apply fx along naxis for each sample/feature
            mdata = np.apply_along_axis(self.__fx, naxis, data, *self.__fxargs)
        assert(mdata.ndim in (data.ndim, data.ndim-1))
        return mdata
Пример #47
0
    def reverse1(self, data):
        """Reverse-maps data or datasets through the chain (backwards).

        See `Mapper` for more information.
        """
        mp = data
        for i, m in enumerate(reversed(self)):
            # we ignore mapper that do not have reverse mapping implemented
            # (e.g. detrending). That might cause problems if ignoring the
            # mapper make the data incompatible input for the next mapper in
            # the chain. If that pops up, we have to think about a proper
            # solution.
            try:
                if __debug__:
                    debug('MAP',
                          "Reversing single %s-shaped input though chain node '%s'."
                           % (mp.shape, str(m)))
                mp = m.reverse1(mp)
            except NotImplementedError:
                if __debug__:
                    debug('MAP', "Ignoring %s on reverse mapping." % m)
            except ValueError:
                if __debug__:
                    debug('MAP',
                          "Failed to reverse-map through chain at '%s'. Maybe "
                          "previous mapper return multiple samples. Trying to "
                          "switch to reverse() for the remainder of the chain."
                          % str(m))
                mp = self[:-1 * i].reverse(mp)
                return mp
        return mp
Пример #48
0
    def _forward_dataset(self, dataset):
        """Forward-map a dataset.

        This is a private method that can be reimplemented in derived
        classes. The default implementation forward-maps the dataset samples
        and returns a new dataset that is a shallow copy of the input with
        the mapped samples.

        Parameters
        ----------
        dataset : Dataset-like
        """
        if __debug__:
            debug('MAP_', "Forward-map %s-shaped samples in dataset with '%s'."
                        % (dataset.samples.shape, self))
        msamples = self._forward_data(dataset.samples)
        if __debug__:
            debug('MAP_', "Make shallow copy of to-be-forward-mapped dataset "
                    "and assigned forward-mapped samples ({sf}a_filters: "
                    "%s, %s, %s)." % (self._sa_filter, self._fa_filter,
                                      self._a_filter))
        mds = dataset.copy(deep=False,
                           sa=self._sa_filter,
                           fa=self._fa_filter,
                           a=self._a_filter)
        mds.samples = msamples
        _assure_consistent_a(mds, dataset.shape)

        if __debug__:
            debug('MAP_', "Return forward-mapped dataset.")
        return mds
Пример #49
0
    def _concat_results(sl=None, dataset=None, roi_ids=None, results=None):
        """The simplest implementation for collecting the results --
        just put them into a list

        This this implementation simply collects them into a list and
        uses only sl. for assigning conditional attributes.  But
        custom implementation might make use of more/less of them.
        Implemented as @staticmethod just to emphasize that in
        principle it is independent of the actual searchlight instance
        """
        # collect results
        results = sum(results, [])

        if __debug__ and 'SLC' in debug.active:
            debug('SLC', '')  # just newline
            resshape = len(results) and np.asanyarray(
                results[0]).shape or 'N/A'
            debug(
                'SLC',
                ' hstacking %d results of shape %s' % (len(results), resshape))

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        result_ds = hstack(results)

        if __debug__:
            debug('SLC', " hstacked shape %s" % (result_ds.shape, ))

        if sl.ca.is_enabled('roi_feature_ids'):
            sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results]
        if sl.ca.is_enabled('roi_sizes'):
            sl.ca.roi_sizes = [r.a.roi_sizes for r in results]

        return result_ds
Пример #50
0
 def __call__(self, ds):
     # overwrite __call__ to perform a rigorous check whether the learner was
     # trained before use and auto-train
     if self.is_trained:
         # already trained
         if self.force_train:
             if __debug__:
                 debug('LRN', "Forcing training of %s on %s",
                       (self, ds))
             # but retraining is enforced
             self.train(ds)
         elif __debug__:
             debug('LRN', "Skipping training of already trained %s on %s",
                   (self, ds))
     else:
         # not trained
         if self.auto_train:
             # auto training requested
             if __debug__:
                 debug('LRN', "Auto-training %s on %s",
                       (self, ds))
             self.train(ds)
         else:
             # we always have to have trained before using a learner
             raise RuntimeError("%s needs to be trained before it can be "
                                "used and auto training is disabled."
                                % str(self))
     return super(Learner, self).__call__(ds)
Пример #51
0
    def __init__(self, startpoints, boxlength, offset=0, **kwargs):
        """
        Parameters
        ----------
        startpoints : sequence
          Index values along the first axis of 'data'.
        boxlength : int
          The number of elements after 'startpoint' along the first axis of
          'data' to be considered for the boxcar.
        offset : int
          The offset between the provided starting point and the actual start
          of the boxcar.
        """
        Mapper.__init__(self, **kwargs)
        self._outshape = None

        startpoints = np.asanyarray(startpoints)
        if np.issubdtype(startpoints.dtype, 'i'):
            self.startpoints = startpoints
        else:
            if __debug__:
                debug(
                    'MAP', "Boxcar: obtained startpoints are not of int type."
                    " Rounding and changing dtype")
            self.startpoints = np.asanyarray(np.round(startpoints), dtype='i')

        # Sanity checks
        if boxlength < 1:
            raise ValueError, "Boxlength lower than 1 makes no sense."
        if boxlength - int(boxlength) != 0:
            raise ValueError, "boxlength must be an integer value."

        self.boxlength = int(boxlength)
        self.offset = offset
        self.__selectors = None

        # build a list of list where each sublist contains the indexes of to be
        # averaged data elements
        self.__selectors = [ slice(i + offset, i + offset + boxlength) \
                             for i in startpoints ]
Пример #52
0
def mask2slice(mask):
    """Convert a boolean mask vector into an equivalent slice (if possible).

    Parameters
    ----------
    mask: boolean array
      The mask.

    Returns
    -------
    slice or boolean array
      If possible the boolean mask is converted into a `slice`. If this is not
      possible the unmodified boolean mask is returned.
    """
    # the filter should be a boolean array
    # TODO Could be easily extended to also accept index arrays
    if not len(mask):
        raise ValueError("Got an empty mask.")
    # get indices of non-zero filter elements
    idx = mask.nonzero()[0]
    if not len(idx):
        return slice(0)
    idx_start = idx[0]
    idx_end = idx[-1] + 1
    idx_step = None
    if len(idx) > 1:
        # we need to figure out if there is a regular step-size
        # between elements
        stepsizes = np.unique(idx[1:] - idx[:-1])
        if len(stepsizes) > 1:
            # multiple step-sizes -> slicing is not possible -> return
            # orginal filter
            return mask
        else:
            idx_step = stepsizes[0]

    sl = slice(idx_start, idx_end, idx_step)
    if __debug__:
        debug("SPL", "Boolean mask conversion to slice is possible (%s)." % sl)
    return sl
Пример #53
0
 def df(x):
     """
     Proxy to the log_marginal_likelihood first
     derivative. Necessary for OpenOpt when using derivatives.
     """
     self.hyp_running_guess[self.freeHypers] = x
     # REMOVE print "df guess:",self.hyp_running_guess,x
     # XXX EO: Most of the following lines can be skipped if
     # df() is computed just after f() with the same
     # hyperparameters. The partial results obtained during f()
     # are what is needed for df(). For now, in order to avoid
     # bugs difficult to trace, we keep this redunundancy. A
     # deep check with how OpenOpt works or using memoization
     # should solve this issue.
     try:
         if self.logscale:
             self.parametric_model.set_hyperparameters(
                 np.exp(self.hyp_running_guess))
         else:
             self.parametric_model.set_hyperparameters(
                 self.hyp_running_guess)
             pass
     except InvalidHyperparameterError:
         if __debug__:
             debug("MOD_SEL", "WARNING: invalid hyperparameters!")
         return -np.inf
     # Check if it is possible to avoid useless computations
     # already done in f(). According to tests and information
     # collected from OpenOpt people, it is sufficiently
     # unexpected that the following test succeed:
     if np.any(x != self.f_last_x):
         if __debug__:
             debug(
                 "MOD_SEL",
                 "UNEXPECTED: recomputing train+log_marginal_likelihood."
             )
         try:
             self.parametric_model.train(self.dataset)
         except (np.linalg.linalg.LinAlgError, SL.basic.LinAlgError,
                 ValueError):
             if __debug__:
                 debug(
                     "MOD_SEL",
                     "WARNING: Cholesky failed! Invalid hyperparameters!"
                 )
             # XXX EO: which value for the gradient to return to
             # OpenOpt when hyperparameters are wrong?
             return np.zeros(x.size)
         log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood(
         )  # recompute what's needed (to be safe) REMOVE IN FUTURE!
         pass
     if self.logscale:
         gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale(
         )
     else:
         gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood(
         )
         pass
     # REMOVE print "grad:",gradient_log_marginal_likelihood
     return gradient_log_marginal_likelihood[self.freeHypers]
Пример #54
0
def _recon_customobj_defaultrecon(hdf, memo):
    """Reconstruct a custom object from HDF using the default recontructor"""
    cls_name = hdf.attrs['class']
    mod_name = hdf.attrs['module']
    if __debug__:
        debug('HDF5', "Load class instance '%s.%s' instance [%s]"
                      % (mod_name, cls_name, hdf.name))
    mod, cls = _import_from_thin_air(mod_name, cls_name)

    # create the object
    # use specialized __new__ if necessary or beneficial
    pcls, = _get_subclass_entry(cls, ((dict,), (list,), (object,)),
                                "Do not know how to create instance of %(cls)s")
    obj = pcls.__new__(cls)
    # insert any stored object state
    _update_obj_state_from_hdf(obj, hdf, memo)

    # do we process a container?
    if 'items' in hdf:
        # charge the items -- handling depends on the parent class
        pcls, umeth, cfunc = _get_subclass_entry(
            cls,
            ((dict, 'update', _hdf_dict_to_obj),
             (list, 'extend', _hdf_list_to_obj)),
            "Unhandled container type (got: '%(cls)s').")
        if __debug__:
            debug('HDF5', "Populating %s object." % pcls)
        getattr(obj, umeth)(cfunc(hdf, memo))
        if __debug__:
            debug('HDF5', "Loaded %i items." % len(obj))

    return obj
Пример #55
0
 def _set(self, val, init=False):
     if self.constraints is not None:
         #            for c in self.constraints:
         #                val = c(val)
         #                #val = c.validate(val)
         val = self.constraints(val)
     different_value = self._value != val
     isarray = isinstance(different_value, np.ndarray)
     if self._ro and not init:
         raise RuntimeError("Attempt to set read-only parameter %s to %s" \
               % (self.name, val))
     if (isarray and np.any(different_value)) or \
        ((not isarray) and different_value):
         if __debug__:
             debug("COL", "Parameter: setting %s to %s " % (str(self), val))
         self._value = val
         # Set 'isset' only if not called from initialization routine
         self._isset = not init  #True
     elif __debug__:
         debug("COL",
               "Parameter: not setting %s since value is the same" \
               % (str(self)))
Пример #56
0
def check_all_dependencies(force=False, verbosity=1):
    """
    Test for all known dependencies.

    Parameters
    ----------
    force : boolean
      Whether to force the test even if it has already been
      performed.

    """
    # loop over all known dependencies
    for dep in _KNOWN:
        if not exists(dep, force):
            if verbosity:
                warning("%s is not available." % dep)

    if __debug__:
        debug('EXT', 'The following optional externals are present: %s'
                     % [k[5:] for k in cfg.options('externals')
                        if k.startswith('have')
                        and cfg.getboolean('externals', k)])
Пример #57
0
    def reverse(self, data):
        """Reverse-maps data or datasets through the chain (backwards).

        See `Mapper` for more information.
        """
        mp = data
        for m in reversed(self):
            # we ignore mapper that do not have reverse mapping implemented
            # (e.g. detrending). That might cause problems if ignoring the
            # mapper make the data incompatible input for the next mapper in
            # the chain. If that pops up, we have to think about a proper
            # solution.
            try:
                if __debug__:
                    debug('MAP',
                          "Reversing %s-shaped input though '%s'."
                           % (mp.shape, str(m)))
                mp = m.reverse(mp)
            except NotImplementedError:
                if __debug__:
                    debug('MAP', "Ignoring %s on reverse mapping." % m)
        return mp
Пример #58
0
 def __init__(self, value=None, name=None, doc=None):
     """
     Parameters
     ----------
     value : arbitrary (see derived implementations)
       The actual value of this attribute.
     name : str
       Name of the collectable under which it should be available in its
       respective collection.
     doc : str
       Documentation about the purpose of this collectable.
     """
     if doc is not None:
         # to prevent newlines in the docstring
         doc = re.sub('[\n ]+', ' ', doc)
     self.__doc__ = doc
     self.__name = name
     self._value = None
     if not value is None:
         self._set(value)
     if __debug__ and __mvpadebug__:
         debug("COL", "Initialized %r", (self, ))
Пример #59
0
    def solve(self, problem=None):
        """Solve the maximization problem, check outcome and collect results.
        """
        # XXX: this method can be made more abstract in future in the
        # sense that it could work not only for
        # log_marginal_likelihood but other measures as well
        # (e.g. cross-valideted error).

        if np.all(self.freeHypers==False): # no optimization needed
            self.hyperparameters_best = self.hyp_initial_guess.copy()
            try:
                self.parametric_model.set_hyperparameters(self.hyperparameters_best)
            except InvalidHyperparameterError:
                if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
                self.log_marginal_likelihood_best = -np.inf
                return self.log_marginal_likelihood_best
            self.parametric_model.train(self.dataset)
            self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood()
            return self.log_marginal_likelihood_best

        result = self.problem.solve(self.optimization_algorithm) # perform optimization!
        if result.stopcase == -1:
            # XXX: should we use debug() for the following messages?
            # If so, how can we track the missing convergence to a
            # solution?
            print "Unable to find a maximum to log_marginal_likelihood"
        elif result.stopcase == 0:
            print "Limits exceeded"
        elif result.stopcase == 1:
            self.hyperparameters_best = self.hyp_initial_guess.copy()
            if self.logscale:
                self.hyperparameters_best[self.freeHypers] = np.exp(result.xf) # best hyperparameters found # NOTE is it better to return a copy?
            else:
                self.hyperparameters_best[self.freeHypers] = result.xf
                pass
            self.log_marginal_likelihood_best = result.ff # actual best vuale of log_marginal_likelihood
            pass
        self.stopcase = result.stopcase
        return self.log_marginal_likelihood_best
Пример #60
0
    def _get_default_c(self, data):
        """Compute default C

        TODO: for non-linear SVMs
        """

        if self.params.kernel.__kernel_name__ == 'linear':
            # TODO: move into a function wrapper for
            #       np.linalg.norm
            if np.issubdtype(data.dtype, np.integer):
                # we are dealing with integers and overflows are
                # possible, so assure working with floats
                def sq_func(x):
                    y = x.astype(float) # copy as float
                    y *= y              # in-place square
                    return y
            else:
                sq_func = np.square
            # perform it per each sample so we do not double memory
            # with calling sq_func on full data
            # Having a list of norms here automagically resolves issue
            # with memmapped operations on which return
            # in turn another memmap
            datasetnorm = np.mean([np.sqrt(np.sum(sq_func(s)))
                                   for s in data])
            if datasetnorm == 0:
                warning("Obtained degenerate data with zero norm for training "
                        "of %s.  Scaling of C cannot be done." % self)
                return 1.0
            value = 1.0/(datasetnorm**2)
            if __debug__:
                debug("SVM", "Default C computed to be %f" % value)
        else:
            warning("TODO: Computation of default C is not yet implemented" +
                    " for non-linear SVMs. Assigning 1.0")
            value = 1.0

        return value