def __reverse_single_level(self, wp): # local bindings level_paths = self.__level_paths # define wavelet packet to use WP = pywt.WaveletPacket( data=None, wavelet=self._wavelet, mode=self._mode, maxlevel=self.__level) # prepare storage signal_shape = wp.shape[:1] + self._inshape[1:] signal = np.zeros(signal_shape) Ntime_points = self._intimepoints for indexes in _get_indexes(signal_shape, self._dim): if __debug__: debug('MAP_', " %s" % (indexes,), lf=False, cr=True) for path, level_data in zip(level_paths, wp[indexes]): WP[path] = level_data signal[indexes] = WP.reconstruct(True)[:Ntime_points] return signal
def _hdf_list_to_objarray(hdf, memo): if not ('shape' in hdf.attrs): if __debug__: debug('HDF5', "Enountered objarray stored without shape (due to a bug " "in post 2.1 release). Some nested structures etc might not be " "loaded incorrectly") # yoh: we have possibly a problematic case due to my fix earlier # resolve to old logic: nested referencing might not work :-/ obj = _hdf_list_to_obj(hdf, memo) # need to handle special case of arrays of objects if np.isscalar(obj): obj = np.array(obj, dtype=np.object) else: obj = asobjarray(obj) else: shape = tuple(hdf.attrs['shape']) # reserve space first if len(shape): obj = np.empty(np.prod(shape), dtype=object) else: # scalar obj = np.array(None, dtype=object) # now load the items from the list, noting existence of this # container obj_items = _hdf_list_to_obj(hdf, memo, target_container=obj) # assign to the object array for i, v in enumerate(obj_items): obj[i] = v if len(shape) and shape != obj.shape: obj = obj.reshape(shape) return obj
def _call(self, dataset): sensitivities = [] for ind, analyzer in enumerate(self.__analyzers): if __debug__: debug("SA", "Computing sensitivity for SA#%d:%s" % (ind, analyzer)) sensitivity = analyzer(dataset) sensitivities.append(sensitivity) if __debug__: debug("SA", "Returning %d sensitivities from %s" % (len(sensitivities), self.__class__.__name__)) sa_attr = self._sa_attr if isinstance(sensitivities[0], AttrDataset): smerged = None for i, s in enumerate(sensitivities): s.sa[sa_attr] = np.repeat(i, len(s)) if smerged is None: smerged = s else: smerged.append(s) sensitivities = smerged else: sensitivities = \ Dataset(sensitivities, sa={sa_attr: np.arange(len(sensitivities))}) self.ca.sensitivities = sensitivities return sensitivities
def _get_selected_ids(self, dataset): """Given a dataset actually select the features Returns ------- indexes of the selected features """ # optionally train the analyzer first if self.__train_analyzer: self.__sensitivity_analyzer.train(dataset) sensitivity = self.__sensitivity_analyzer(dataset) """Compute the sensitivity map.""" self.ca.sensitivity = sensitivity # Select features to preserve selected_ids = self.__feature_selector(sensitivity) if __debug__: debug("FS_", "Sensitivity: %s Selected ids: %s" % (sensitivity, selected_ids)) # XXX not sure if it really has to be sorted selected_ids.sort() return selected_ids
def _suppress_scipy_warnings(): # Infiltrate warnings if necessary numpy_ver = versions['numpy'] scipy_ver = versions['scipy'] # There is way too much deprecation warnings spit out onto the # user. Lets assume that they should be fixed by scipy 0.7.0 time if not __debug__ or (__debug__ and 'PY' not in debug.active): filter_lines = [] if "0.6.0" <= scipy_ver and scipy_ver < "0.7.0" \ and numpy_ver > "1.1.0": if __debug__: debug('EXT', "Setting up filters for numpy DeprecationWarnings " "regarding scipy < 0.7.0") filter_lines += [ ('NumpyTest will be removed in the next release.*', DeprecationWarning), ('PyArray_FromDims: use PyArray_SimpleNew.', DeprecationWarning), ('PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.', DeprecationWarning), # Trick re.match, since in warnings absent re.DOTALL in re.compile ('[\na-z \t0-9]*The original semantics of histogram is scheduled to be.*' '[\na-z \t0-9]*', Warning) ] if scipy_ver >= "0.15": filter_lines += [("`scipy.weave` is deprecated, use `weave` instead!", DeprecationWarning)] if scipy_ver >= "0.16": # scipy deprecated it but statsmodels still import it for now filter_lines += [("`scipy.linalg.calc_lwork` is deprecated!", DeprecationWarning)] for f, w in filter_lines: warnings.filterwarnings('ignore', f, w)
def newfunc(*arg, **kwargs): nfailed, i = 0, 0 # define i just in case for i in xrange(niter): try: ret = func(*arg, **kwargs) if i + 1 - nfailed >= niter - nfailures: # so we know already that we wouldn't go over # nfailures break except AssertionError, e: nfailed += 1 if __debug__: debug("TEST", "Upon %i-th run, test %s failed with %s", (i, func.__name__, e)) if nfailed > nfailures: if __debug__: debug( "TEST", "Ran %s %i times. Got %d failures, " "while was allowed %d " "-- re-throwing the last failure %s", (func.__name__, i + 1, nfailed, nfailures, e), ) exc_info = sys.exc_info() raise exc_info[1], None, exc_info[2]
def _train(self, samples): """Perform network training. Parameters ---------- samples : array-like Used for unsupervised training of the SOM. Notes ----- It is assumed that prior to calling this method the _pretrain method was called with the same argument. """ # ensure that dqd was set properly dqd = self._dqd if dqd is None: raise ValueError("This should not happen - was _pretrain called?") # units weight vector deltas for batch training # (height x width x #features) unit_deltas = np.zeros(self._K.shape, dtype='float') # for all iterations for it in xrange(1, self.niter + 1): # compute the neighborhood impact kernel for this iteration # has to be recomputed since kernel shrinks over time k = self._compute_influence_kernel(it, dqd) # for all training vectors for s in samples: # determine closest unit (as element coordinate) b = self._get_bmu(s) # train all units at once by unfolding the kernel (from the # single quadrant that is precomputed), cutting it to the # right shape and simply multiply it to the difference of target # and all unit weights.... infl = np.vstack(( np.hstack(( # upper left k[b[0]:0:-1, b[1]:0:-1], # upper right k[b[0]:0:-1, :self.kshape[1] - b[1]])), np.hstack(( # lower left k[:self.kshape[0] - b[0], b[1]:0:-1], # lower right k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]])) )) unit_deltas += infl[:, :, np.newaxis] * (s - self._K) # apply cumulative unit deltas self._K += unit_deltas if __debug__: debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" % (it, self.niter, np.sqrt(np.sum(unit_deltas ** 2)))) # reset unit deltas unit_deltas.fill(0.)
def _forward_dataset(self, dataset): # invoke super class _forward_dataset, this calls, _forward_dataset # and this calls _forward_data in this class mds = super(FlattenMapper, self)._forward_dataset(dataset) # attribute collection needs to have a new length check mds.fa.set_length_check(mds.nfeatures) # we need to duplicate all existing feature attribute, as each original # feature is now spread across the new feature axis # take all "additional" axes after the actual feature axis and count # elements a sample -- if not axis exists this will be 1 for k in dataset.fa: if __debug__: debug("MAP_", "Forward-mapping fa '%s'." % k) attr = dataset.fa[k].value # the maximmum number of axis to flatten in the attr if not self.__maxdims is None: maxdim = min(len(self.__origshape), self.__maxdims) else: maxdim = len(self.__origshape) multiplier = mds.nfeatures / np.prod(attr.shape[:maxdim]) if __debug__: debug("MAP_", "Broadcasting fa '%s' %s %d times" % (k, attr.shape, multiplier)) # broadcast as many times as necessary to get 'matching dimensions' bced = np.repeat(attr, multiplier, axis=0) # now reshape as many dimensions as the mapper knows about mds.fa[k] = bced.reshape((-1,) + bced.shape[maxdim:]) # if there is no inspace return immediately if self.get_space() is None: return mds # otherwise create the coordinates as feature attributes else: mds.fa[self.get_space()] = list(np.ndindex(dataset.samples[0].shape)) return mds
def _postcall(self, ds, result): """Postprocessing of results. By default, does nothing. Parameters ---------- ds: Dataset Original input dataset. result: Dataset Preliminary result dataset (as produced by ``_call()``). Returns ------- Dataset """ if not self.__postproc is None: if __debug__: debug("NO", "Applying post-processing node %s", (self.__postproc,)) self.ca.raw_results = result result = self.__postproc(result) return result
def _train(self, samples): """Determine the projection matrix onto the SVD components from a 2D samples x feature data matrix. """ X = np.asmatrix(samples) X = self._demean_data(X) # singular value decomposition U, SV, Vh = np.linalg.svd(X, full_matrices=0) # store the final matrix with the new basis vectors to project the # features onto the SVD components. And store its .H right away to # avoid computing it in forward() self._proj = Vh.H # also store singular values of all components self._sv = SV if __debug__: debug("MAP", "SVD was done on %s and obtained %d SVs " % (samples, len(SV)) + " (%d non-0, max=%f)" % (len(SV.nonzero()), SV[0])) # .norm might be somewhat expensive to compute if "MAP_" in debug.active: debug("MAP_", "Mixing matrix has %s shape and norm=%f" % (self._proj.shape, np.linalg.norm(self._proj)))
def __init__(self, space=None, pass_attr=None, postproc=None, **kwargs): """ Parameters ---------- space : str, optional Name of the 'processing space'. The actual meaning of this argument heavily depends on the sub-class implementation. In general, this is a trigger that tells the node to compute and store information about the input data that is "interesting" in the context of the corresponding processing in the output dataset. pass_attr : str, list of str, optional What attribute(s) (from sa, fa, a collections, see :meth:`Dataset.get_attr`) to pass from original dataset provided to __call__ (before applying postproc), or from 'ca' collection of this instance (use 'ca.' prefix) into the resultant dataset. postproc : Node instance, optional Node to perform post-processing of results. This node is applied in `__call__()` to perform a final processing step on the to be result dataset. If None, nothing is done. """ ClassWithCollections.__init__(self, **kwargs) if __debug__: debug("NO", "Init node '%s' (space: '%s', postproc: '%s')", (self.__class__.__name__, space, str(postproc))) self.set_space(space) self.set_postproc(postproc) if isinstance(pass_attr, basestring): pass_attr = (pass_attr,) self.__pass_attr = pass_attr
def _untrain(self): if __debug__: debug("FS_", "Untraining Iterative FS: %s" % self) self._fmeasure.untrain() self._pmeasure.untrain() # ask base class to do its untrain super(IterativeFeatureSelection, self)._untrain()
def _prepredict(self, dataset): """Functionality prior prediction """ if not ('notrain2predict' in self.__tags__): # check if classifier was trained if that is needed if not self.trained: raise FailedToPredictError( "Classifier %s wasn't yet trained, therefore can't " "predict" % self) nfeatures = dataset.nfeatures #data.shape[1] # check if number of features is the same as in the data # it was trained on if nfeatures != self.__trainednfeatures: raise ValueError, \ "Classifier %s was trained on data with %d features, " % \ (self, self.__trainednfeatures) + \ "thus can't predict for %d features" % nfeatures if self.params.retrainable: if not self.__changedData_isset: self.__reset_changed_data() _changedData = self._changedData data = np.asanyarray(dataset.samples) _changedData['testdata'] = \ self.__was_data_changed('testdata', data) if __debug__: debug('CLF_', "prepredict: Obtained _changedData is %s", (_changedData,))
def _set(self, val): if __debug__ and __mvpadebug__: # Since this call is quite often, don't convert # values to strings here, rely on passing them # withing msgargs debug("COL", "Setting %s to %s ", (self, val)) self._value = val
def __was_data_changed(self, key, entry, update=True): """Check if given entry was changed from what known prior. If so -- store only the ones needed for retrainable beastie """ idhash_ = idhash(entry) __idhashes = self.__idhashes changed = __idhashes[key] != idhash_ if __debug__ and 'CHECK_RETRAIN' in debug.active: __trained = self.__trained changed2 = entry != __trained[key] if isinstance(changed2, np.ndarray): changed2 = changed2.any() if changed != changed2 and not changed: raise RuntimeError, \ 'idhash found to be weak for %s. Though hashid %s!=%s %s, '\ 'estimates %s!=%s %s' % \ (key, idhash_, __idhashes[key], changed, entry, __trained[key], changed2) if update: __trained[key] = entry if __debug__ and changed: debug('CLF_', "Changed %s from %s to %s.%s", (key, __idhashes[key], idhash_, ('','updated')[int(update)])) if update: __idhashes[key] = idhash_ return changed
def label_voxel(self, c, levels = None): if self.__referenceLevel is None: warning("You did not provide what level to use " "for reference. Assigning 0th level -- '%s'" % (self._levels[0],)) self.set_reference_level(0) # return self.__referenceAtlas.label_voxel(c, levels) c = self._check_range(c) # obtain coordinates of the closest voxel cref = self._data[ self.__referenceLevel.indexes, c[0], c[1], c[2] ] dist = norm( (cref - c) * self.voxdim ) if __debug__: debug('ATL__', "Closest referenced point for %r is " "%r at distance %3.2f" % (c, cref, dist)) if (self.distance - dist) >= 1e-3: # neglect everything smaller result = self.__referenceAtlas.label_voxel(cref, levels) result['voxel_referenced'] = c result['distance'] = dist else: result = self.__referenceAtlas.label_voxel(c, levels) if __debug__: debug('ATL__', "Closest referenced point is " "further than desired distance %.2f" % self.distance) result['voxel_referenced'] = None result['distance'] = 0 return result
def _train(self, samples): """Perform network training. Parameter --------- samples : array-like Used for unsupervised training of the SOM. """ # XXX initialize with clever default, e.g. plain of first two PCA # components self._K = np.random.standard_normal(tuple(self.kshape) + (samples.shape[1],)) # units weight vector deltas for batch training # (height x width x #features) unit_deltas = np.zeros(self._K.shape, dtype='float') # precompute distance kernel between elements in the Kohonen layer # that will remain constant throughout the training # (just compute one quadrant, as the distances are symmetric) # XXX maybe do other than squared Euclidean? dqd = np.fromfunction(lambda x, y: (x**2 + y**2)**0.5, self.kshape, dtype='float') # for all iterations for it in xrange(1, self.niter + 1): # compute the neighborhood impact kernel for this iteration # has to be recomputed since kernel shrinks over time k = self._compute_influence_kernel(it, dqd) # for all training vectors for s in samples: # determine closest unit (as element coordinate) b = self._get_bmu(s) # train all units at once by unfolding the kernel (from the # single quadrant that is precomputed), cutting it to the # right shape and simply multiply it to the difference of target # and all unit weights.... infl = np.vstack(( np.hstack(( # upper left k[b[0]:0:-1, b[1]:0:-1], # upper right k[b[0]:0:-1, :self.kshape[1] - b[1]])), np.hstack(( # lower left k[:self.kshape[0] - b[0], b[1]:0:-1], # lower right k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]])) )) unit_deltas += infl[:,:,np.newaxis] * (s - self._K) # apply cumulative unit deltas self._K += unit_deltas if __debug__: debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" % (it, self.niter, np.sqrt(np.sum(unit_deltas **2)))) # reset unit deltas unit_deltas.fill(0.)
def _binary_data_bytecount(niml): '''helper function that returns how many bytes a NIML binary data element should have''' niform = niml['ni_form'] if not 'binary' in niform: raise ValueError('Illegal niform %s' % niform) tps = niml['vec_typ'] onetype = types.findonetype(tps) if onetype is None: debug('NIML', 'Not unique type: %r', tps) return None # numeric, either int or float ncols = niml['vec_num'] nrows = niml['vec_len'] tp = types.code2numpy_type(onetype) bytes_per_elem = types.numpy_type2bytecount(tp) if bytes_per_elem is None: raise ValueError("Type not supported: %r" % onetype) nb = ncols * nrows * bytes_per_elem debug('NIML', 'Number of bytes for %s: %d x %d with %d bytes / element', (niform, ncols, nrows, bytes_per_elem)) return nb
def _level3(self, datasets): params = self.params # for quicker access ;) # create a mapper per dataset mappers = [deepcopy(params.alignment) for ds in datasets] # key different from level-2; the common space is uniform #temp_commonspace = commonspace residuals = None if self.ca['residual_errors'].enabled: residuals = np.zeros((1, len(datasets))) self.ca.residual_errors = Dataset(samples=residuals) # start from original input datasets again for i, (m, ds_new) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 3: ds #%i" % i) # retrain mapper on final common space ds_new.sa[m.get_space()] = self.commonspace m.train(ds_new) # remove common space attribute again to save on memory del ds_new.sa[m.get_space()] if residuals is not None: # obtain final projection data_mapped = m.forward(ds_new.samples) residuals[0, i] = np.linalg.norm(data_mapped - self.commonspace) return mappers
def _untrain(self): if __debug__: debug("FS_", "Untraining combined FS: %s" % self) for fs in self.__selectors: fs.untrain() # ask base class to do its untrain super(CombinedFeatureSelection, self)._untrain()
def __init__(self, index=None, *args, **kwargs): """ Parameters ---------- value : arbitrary (see derived implementations) The actual value of this attribute. **kwargs Passed to `Collectable` """ if index is None: IndexedCollectable._instance_index += 1 index = IndexedCollectable._instance_index else: # TODO: there can be collision between custom provided indexes # and the ones automagically assigned. # Check might be due pass self._instance_index = index self._isset = False self.reset() Collectable.__init__(self, *args, **kwargs) if __debug__ and 'COL' in debug.active: debug("COL", "Initialized new IndexedCollectable #%d:%s %r", (index, self.name, self))
def _train(self, dataset): """Select the most important features Parameters ---------- dataset : Dataset used to compute sensitivity maps """ # optionally train the analyzer first if self.__train_analyzer: self.__sensitivity_analyzer.train(dataset) sensitivity = self.__sensitivity_analyzer(dataset) """Compute the sensitivity map.""" self.ca.sensitivity = sensitivity # Select features to preserve selected_ids = self.__feature_selector(sensitivity) if __debug__: debug("FS_", "Sensitivity: %s Selected ids: %s" % (sensitivity, selected_ids)) # XXX not sure if it really has to be sorted selected_ids.sort() # announce desired features to the underlying slice mapper self._safe_assign_slicearg(selected_ids) # and perform its own training super(SensitivityBasedFeatureSelection, self)._train(dataset)
def _train(self, samples): """Train PrototypeMapper """ self._proj = np.hstack([similarity.computed(samples, self.prototypes) for similarity in self.similarities]) if __debug__: debug("MAP", "projected data of shape %s: %s " % (self._proj.shape, self._proj))
def get_selected_indexes(self, n_cfgs): """A naive selection of indexes according to strategy and count Parameters ---------- n_cfgs: int Total number of configurations to select from """ strategy = self.selection_strategy count = self.count if strategy == 'first': indexes = slice(0, count) elif strategy in ['equidistant', 'random']: if strategy == 'equidistant': # figure out what step is needed to # accommodate the `count` number step = float(n_cfgs) / count assert (step >= 1.0) indexes = [int(round(step * i)) for i in xrange(count)] elif strategy == 'random': indexes = np.random.permutation(range(n_cfgs))[:count] # doesn't matter much but lets keep them in the original # order at least indexes.sort() else: # who said that I am paranoid? raise RuntimeError("Really should not happen") if __debug__: debug("SPL", "For %s selection strategy selected %s " "partition specs from %d total", (strategy, indexes, n_cfgs)) return indexes
def _call(self, dataset): # OPT: local bindings clfclf = self.clf.clf analyzer = self.__analyzer if analyzer is None: analyzer = clfclf.get_sensitivity_analyzer( **(self._slave_kwargs)) if analyzer is None: raise ValueError, \ "Wasn't able to figure basic analyzer for clf %s" % \ `clfclf` if __debug__: debug("SA", "Selected analyzer %s for clf %s" % \ (analyzer, clfclf)) # bind to the instance finally self.__analyzer = analyzer # TODO "remove" unnecessary things below on each call... # assign corresponding classifier analyzer.clf = clfclf # if clf was trained already - don't train again if clfclf.trained: analyzer._force_train = False result = analyzer._call(dataset) self.ca.clf_sensitivities = result return result
def __init__(self, value=None, name=None, doc=None): """ Parameters ---------- value : arbitrary (see derived implementations) The actual value of this attribute. name : str Name of the collectable under which it should be available in its respective collection. doc : str Documentation about the purpose of this collectable. """ if doc is not None: # to prevent newlines in the docstring try: doc = re.sub('[\n ]+', ' ', doc) except TypeError: # catch some old datasets stored in HDF5 doc = re.sub('[\n ]+', ' ', np.asscalar(doc)) self.__doc__ = doc self.__name = name self._value = None if value is not None: self._set(value) if __debug__ and __mvpadebug__: debug("COL", "Initialized %r", (self,))
def _forward_dataset(self, dataset): """Forward-map a dataset. This is a private method that can be reimplemented in derived classes. The default implementation forward-maps the dataset samples and returns a new dataset that is a shallow copy of the input with the mapped samples. Parameters ---------- dataset : Dataset-like """ if __debug__: debug('MAP_', "Forward-map %s-shaped samples in dataset with '%s'." % (dataset.samples.shape, self)) msamples = self._forward_data(dataset.samples) if __debug__: debug('MAP_', "Make shallow copy of to-be-forward-mapped dataset " "and assigned forward-mapped samples ({sf}a_filters: " "%s, %s, %s)." % (self._sa_filter, self._fa_filter, self._a_filter)) mds = dataset.copy(deep=False, sa=self._sa_filter, fa=self._fa_filter, a=self._a_filter) mds.samples = msamples return mds
def _call(self, dataset=None): """Extract weights from SMLR classifier. SMLR always has weights available, so nothing has to be computed here. """ clf = self.clf # transpose to have the number of features on the second axis # (as usual) weights = clf.weights.T if __debug__: debug('SMLR', "Extracting weights for %d-class SMLR" % (len(weights) + 1) + "Result: min=%f max=%f" %\ (np.min(weights), np.max(weights))) # limit the labels to the number of sensitivity sets, to deal # with the case of `fit_all_weights=False` ds = Dataset(weights, sa={clf.get_space(): clf._ulabels[:len(weights)]}) if clf.params.has_bias: ds.sa['biases'] = clf.biases return ds
def _call(self, dataset): analyzers = [] # create analyzers for clf in self.clf.clfs: if self.__analyzer is None: analyzer = clf.get_sensitivity_analyzer(**(self._slave_kwargs)) if analyzer is None: raise ValueError, \ "Wasn't able to figure basic analyzer for clf %r" % \ (clf,) if __debug__: debug("SA", "Selected analyzer %r for clf %r" % \ (analyzer, clf)) else: # XXX shallow copy should be enough... analyzer = copy.copy(self.__analyzer) # assign corresponding classifier analyzer.clf = clf # if clf was trained already - don't train again if clf.trained: analyzer._force_train = False analyzers.append(analyzer) self.__combined_analyzer.analyzers = analyzers # XXX not sure if we don't want to call directly ._call(dataset) to avoid # double application of transformers/combiners, after all we are just # 'proxying' here to combined_analyzer... # YOH: decided -- lets call ._call return self.__combined_analyzer._call(dataset)
def forward(self, data): """Map data from input to output space. Parameters ---------- data : Dataset-like, (at least 2D)-array-like Typically this is a `Dataset`, but it might also be a plain data array, or even something completely different(TM) that is supported by a subclass' implementation. If such an object is Dataset-like it is handled by a dedicated method that also transforms dataset attributes if necessary. If an array-like is passed, it has to be at least two-dimensional, with the first axis separating samples or observations. For single samples `forward1()` might be more appropriate. """ if is_datasetlike(data): if __debug__: debug('MAP', "Forward-map %s-shaped dataset through '%s'." % (data.shape, self)) return self._forward_dataset(data) else: if hasattr(data, 'ndim') and data.ndim < 2: raise ValueError( 'Mapper.forward() only support mapping of data with ' 'at least two dimensions, where the first axis ' 'separates samples/observations. Consider using ' 'Mapper.forward1() instead.') if __debug__: debug('MAP', "Forward-map data through '%s'." % (self)) return self._forward_data(data)
def _wm_reverse(self, data): if __debug__: debug('MAP', "Converting signal back using DWP") if self.__level is None: raise NotImplementedError else: if not externals.exists('pywt wp reconstruct'): raise NotImplementedError, \ "Reconstruction for a single level for versions of " \ "pywt < 0.1.7 (revision 103) is not supported" if not externals.exists('pywt wp reconstruct fixed'): warning( "%s: Reverse mapping with this version of 'pywt' might " "result in incorrect data in the tails of the signal. " "Please check for an update of 'pywt', or be careful " "when interpreting the edges of the reverse mapped " "data." % self.__class__.__name__) return self.__reverse_single_level(data)
def __init__(self, kernel=None, **kwargs): """Initialize a GPR regression analysis. Parameters ---------- kernel : Kernel a kernel object defining the covariance between instances. (Defaults to SquaredExponentialKernel if None in arguments) """ # init base class first Classifier.__init__(self, **kwargs) # It does not make sense to calculate a confusion matrix for a GPR # XXX it does ;) it will be a RegressionStatistics actually ;-) # So if someone desires -- let him have it # self.ca.enable('training_stats', False) # set kernel: if kernel is None: kernel = SquaredExponentialKernel() debug("GPR", "No kernel was provided, falling back to default: %s" % kernel) self.__kernel = kernel # append proper clf_internal depending on the kernel # TODO: add "__tags__" to kernels since the check # below does not scale if isinstance(kernel, GeneralizedLinearKernel) or \ isinstance(kernel, LinearKernel): self.__tags__ += ['linear'] else: self.__tags__ += ['non-linear'] if externals.exists('openopt') \ and not 'has_sensitivity' in self.__tags__: self.__tags__ += ['has_sensitivity'] # No need to initialize conditional attributes. Unless they got set # they would raise an exception self.predicted_variances = # None self.log_marginal_likelihood = None self._init_internals() pass
def __check(name, a='__version__'): exec "import %s" % name # it might be lxml.etree, so take only first module topmodname = name.split('.')[0] try: v = getattr(sys.modules[name], '__version__') except Exception as e: # we can't assign version but it is there if __debug__: debug('EXT', 'Failed to acquire a version of %(name)s: %(e)s' % locals()) # if module is present but does not bear __version__ try: import pkg_resources v = pkg_resources.get_distribution(topmodname).version except Exception as e: # and if all that failed -- just assign '0' v = '0' versions[topmodname] = SmartVersion(v) return True # we did manage to import it -- so it is there
def _get_cvec(self, data): """Estimate default and return scaled by it negative user's C values """ if not 'C' in self.params:#svm_type in [_svm.svmc.C_SVC]: raise RuntimeError("Requested estimation of default C whenever C was not set") C = self.params.C if not is_sequence_type(C): # we were not given a tuple for balancing between classes C = [C] Cs = list(C[:]) # copy for i in range(len(Cs)): if Cs[i] < 0: Cs[i] = self._get_default_c(data.samples)*abs(Cs[i]) if __debug__: debug("SVM", "Default C for %s was computed to be %s" % (C[i], Cs[i])) return Cs
def test_debug(self): verbose.handlers = [] # so debug doesn't spoil it debug.active = ['1', '2', 'SLC'] debug.metrics = debug._known_metrics.keys() # do not offset for this test debug('SLC', self.msg, lf=False) self.assertRaises(ValueError, debug, 3, 'bugga') #Should complain about unknown debug id svalue = self.sout.getvalue() regexp = "\[SLC\] DBG(?:{.*})?: %s" % self.msg rematch = re.match(regexp, svalue) self.assertTrue(rematch, msg="Cannot match %s with regexp %s" % (svalue, regexp)) # find metrics self.assertTrue('RSS/VMS:' in svalue, msg="Cannot find vmem metric in " + svalue) self.assertTrue('>test_verbosity:' in svalue, msg="Cannot find tbc metric in " + svalue) self.assertTrue(' sec' in svalue, msg="Cannot find tbc metric in " + svalue)
def _recon_customobj_customrecon(hdf, memo): """Reconstruct a custom object from HDF using a custom recontructor""" # we found something that has some special idea about how it wants # to be reconstructed mod_name = hdf.attrs['module'] recon_name = hdf.attrs['recon'] if __debug__: debug( 'HDF5', "Load from custom reconstructor '%s.%s' [%s]" % (mod_name, recon_name, hdf.name)) # turn names into definitions try: mod = __import__(mod_name, fromlist=[recon_name]) except ImportError, e: if mod_name.startswith('mvpa') and not mod_name.startswith('mvpa2'): # try to be gentle on data that got stored with PyMVPA 0.5 or 0.6 mod_name = mod_name.replace('mvpa', 'mvpa2', 1) mod = __import__(mod_name, fromlist=[recon_name]) else: raise e
def __call__(self, ds): """ .. note: Will raise KeyError if lookup for sample_ids fails, or ds has not been mapped at all """ if (not 'magic_id' in ds.a) or ds.a.magic_id != self._orig_ds_id: raise KeyError, \ 'Dataset %s is not indexed by %s' % (ds, self) _map = self._map _origids = ds.sa.origids res = np.array([_map[i] for i in _origids]) if __debug__: debug('SAL', "Successful lookup: %(inst)s on %(ds)s having " "origids=%(origids)s resulted in %(res)s", msgargs=dict(inst=self, ds=ds, origids=_origids, res=res)) return res
def wr1996(size=200): """Generate '6d robot arm' dataset (Williams and Rasmussen 1996) Was originally created in order to test the correctness of the implementation of kernel ARD. For full details see: http://www.gaussianprocess.org/gpml/code/matlab/doc/regression.html#ard x_1 picked randomly in [-1.932, -0.453] x_2 picked randomly in [0.534, 3.142] r_1 = 2.0 r_2 = 1.3 f(x_1,x_2) = r_1 cos (x_1) + r_2 cos(x_1 + x_2) + N(0,0.0025) etc. Expected relevances: ell_1 1.804377 ell_2 1.963956 ell_3 8.884361 ell_4 34.417657 ell_5 1081.610451 ell_6 375.445823 sigma_f 2.379139 sigma_n 0.050835 """ intervals = np.array([[-1.932, -0.453], [0.534, 3.142]]) r = np.array([2.0, 1.3]) x = np.random.rand(size, 2) x *= np.array(intervals[:, 1] - intervals[:, 0]) x += np.array(intervals[:, 0]) if __debug__: for i in xrange(2): debug( 'DG', '%d columnt Min: %g Max: %g' % (i, x[:, i].min(), x[:, i].max())) y = r[0] * np.cos(x[:, 0] + r[1] * np.cos(x.sum(1))) + \ np.random.randn(size) * np.sqrt(0.0025) y -= y.mean() x34 = x + np.random.randn(size, 2) * 0.02 x56 = np.random.randn(size, 2) x = np.hstack([x, x34, x56]) return dataset_wizard(samples=x, targets=y)
def reverse(self, data): """Reverse-map data from output back into input space. Parameters ---------- data : Dataset-like, anything Typically this is a `Dataset`, but it might also be a plain data array, or even something completely different(TM) that is supported by a subclass' implementation. If such an object is Dataset-like it is handled by a dedicated method that also transforms dataset attributes if necessary. """ if is_datasetlike(data): if __debug__: debug('MAP', "Reverse-map %s-shaped dataset through '%s'." % (data.shape, self)) return self._reverse_dataset(data) else: if __debug__: debug('MAP', "Reverse-map data through '%s'." % (self)) return self._reverse_data(data)
def _set_matplotlib_backend(): """Check if we have custom backend to set and it is different from current one """ backend = cfg.get('matplotlib', 'backend') if backend: import matplotlib as mpl mpl_backend = mpl.get_backend().lower() if mpl_backend != backend.lower(): if __debug__: debug('EXT_', "Trying to set matplotlib backend to %s" % backend) mpl.use(backend) import warnings # And disable useless warning from matplotlib in the future warnings.filterwarnings( 'ignore', 'This call to matplotlib.use() has no effect.*', UserWarning) elif __debug__: debug('EXT_', "Not trying to set matplotlib backend to %s since it was " "already set" % backend)
def reverse1(self, data): """Wrapper method to map single samples. It is basically identical to `reverse()`, but accepts one-dimensional arguments. To map whole dataset this method cannot be used. but `reverse()` handles them. """ if isinstance(data, np.ndarray): data = data[np.newaxis] else: data = np.array([data]) if __debug__: debug( 'MAP', "Reverse-map single %s-shaped sample through '%s'." % (data.shape[1:], self)) mapped = self.reverse(data)[0] if __debug__: debug( 'MAP', "Mapped single %s-shaped sample to %s." % (data.shape[1:], mapped.shape)) return mapped
def __init__(self, space=None, postproc=None, **kwargs): """ Parameters ---------- space: str, optional Name of the 'processing space'. The actual meaning of this argument heavily depends on the sub-class implementation. In general, this is a trigger that tells the node to compute and store information about the input data that is "interesting" in the context of the corresponding processing in the output dataset. postproc : Node instance, optional Node to perform post-processing of results. This node is applied in `__call__()` to perform a final processing step on the to be result dataset. If None, nothing is done. """ ClassWithCollections.__init__(self, **kwargs) if __debug__: debug("NO", "Init node '%s' (space: '%s', postproc: '%s')", (self.__class__.__name__, space, str(postproc))) self.set_space(space) self.set_postproc(postproc)
def _cache(self, ds1, ds2=None): """Initializes internal lookups + _kfull via caching the kernel matrix """ if __debug__ and 'KRN' in debug.active: debug('KRN', "Caching %(inst)s for ds1=%(ds1)s, ds2=%(ds1)s" % dict(inst=self, ds1=ds1, ds2=ds2)) self._lhsids = SamplesLookup(ds1) if (ds2 is None) or (ds2 is ds1): self._rhsids = self._lhsids else: self._rhsids = SamplesLookup(ds2) ckernel = self._kernel ckernel.compute(ds1, ds2) self._kfull = ckernel.as_raw_np() ckernel.cleanup() self._k = self._kfull self._recomputed = True self.params.reset()
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape,)) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] if sl.ca.is_enabled('roi_center_ids'): sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results] if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if roi_ids is None: result_ds.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. feat_sel_mapper = StaticFeatureSelection( roi_ids, dshape=dataset.shape[1:]) if hasattr(mapper, 'append'): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) result_ds.a['mapper'] = mapper # store the center ids as a feature attribute result_ds.fa['center_ids'] = roi_ids return result_ds
def newfunc(*arg, **kwargs): nfailed, i = 0, 0 # define i just in case for i in range(niter): try: ret = func(*arg, **kwargs) if i + 1 - nfailed >= niter - nfailures: # so we know already that we wouldn't go over # nfailures break except AssertionError as e: nfailed += 1 if __debug__: debug('TEST', "Upon %i-th run, test %s failed with %s", (i, func.__name__, e)) if nfailed > nfailures: if __debug__: debug( 'TEST', "Ran %s %i times. Got %d failures, " "while was allowed %d " "-- re-throwing the last failure %s", (func.__name__, i + 1, nfailed, nfailures, e)) exc_info = sys.exc_info() raise exc_info[1].with_traceback(exc_info[2]) if __debug__: debug('TEST', "Ran %s %i times. Got %d failures.", (func.__name__, i + 1, nfailed)) return ret
def __smart_apply_along_axis(self, data): # because apply_along_axis could be very much slower than a # direct invocation of native functions capable of operating # along specific axis, let's make it smarter for those we know # could do that. fx = None naxis = {'samples': 0, 'features': 1}[self.__axis] try: # if first argument is 'axis' -- just proceed with a native call if inspect.getargs(self.__fx.__code__).args[1] == 'axis': fx = self.__fx elif __debug__: debug('FX', "Will apply %s via apply_along_axis", (self.__fx)) except Exception as e: if __debug__: debug('FX', "Failed to deduce either %s has 'axis' argument: %s", (self.__fx, repr(e))) pass if fx is not None: if __debug__: debug('FX', "Applying %s directly to data giving axis=%d", (self.__fx, naxis)) mdata = fx(data, naxis, *self.__fxargs) else: # either failed to deduce signature or just didn't # have 'axis' second # apply fx along naxis for each sample/feature mdata = np.apply_along_axis(self.__fx, naxis, data, *self.__fxargs) assert(mdata.ndim in (data.ndim, data.ndim-1)) return mdata
def reverse1(self, data): """Reverse-maps data or datasets through the chain (backwards). See `Mapper` for more information. """ mp = data for i, m in enumerate(reversed(self)): # we ignore mapper that do not have reverse mapping implemented # (e.g. detrending). That might cause problems if ignoring the # mapper make the data incompatible input for the next mapper in # the chain. If that pops up, we have to think about a proper # solution. try: if __debug__: debug('MAP', "Reversing single %s-shaped input though chain node '%s'." % (mp.shape, str(m))) mp = m.reverse1(mp) except NotImplementedError: if __debug__: debug('MAP', "Ignoring %s on reverse mapping." % m) except ValueError: if __debug__: debug('MAP', "Failed to reverse-map through chain at '%s'. Maybe " "previous mapper return multiple samples. Trying to " "switch to reverse() for the remainder of the chain." % str(m)) mp = self[:-1 * i].reverse(mp) return mp return mp
def _forward_dataset(self, dataset): """Forward-map a dataset. This is a private method that can be reimplemented in derived classes. The default implementation forward-maps the dataset samples and returns a new dataset that is a shallow copy of the input with the mapped samples. Parameters ---------- dataset : Dataset-like """ if __debug__: debug('MAP_', "Forward-map %s-shaped samples in dataset with '%s'." % (dataset.samples.shape, self)) msamples = self._forward_data(dataset.samples) if __debug__: debug('MAP_', "Make shallow copy of to-be-forward-mapped dataset " "and assigned forward-mapped samples ({sf}a_filters: " "%s, %s, %s)." % (self._sa_filter, self._fa_filter, self._a_filter)) mds = dataset.copy(deep=False, sa=self._sa_filter, fa=self._fa_filter, a=self._a_filter) mds.samples = msamples _assure_consistent_a(mds, dataset.shape) if __debug__: debug('MAP_', "Return forward-mapped dataset.") return mds
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray( results[0]).shape or 'N/A' debug( 'SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape, )) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] return result_ds
def __call__(self, ds): # overwrite __call__ to perform a rigorous check whether the learner was # trained before use and auto-train if self.is_trained: # already trained if self.force_train: if __debug__: debug('LRN', "Forcing training of %s on %s", (self, ds)) # but retraining is enforced self.train(ds) elif __debug__: debug('LRN', "Skipping training of already trained %s on %s", (self, ds)) else: # not trained if self.auto_train: # auto training requested if __debug__: debug('LRN', "Auto-training %s on %s", (self, ds)) self.train(ds) else: # we always have to have trained before using a learner raise RuntimeError("%s needs to be trained before it can be " "used and auto training is disabled." % str(self)) return super(Learner, self).__call__(ds)
def __init__(self, startpoints, boxlength, offset=0, **kwargs): """ Parameters ---------- startpoints : sequence Index values along the first axis of 'data'. boxlength : int The number of elements after 'startpoint' along the first axis of 'data' to be considered for the boxcar. offset : int The offset between the provided starting point and the actual start of the boxcar. """ Mapper.__init__(self, **kwargs) self._outshape = None startpoints = np.asanyarray(startpoints) if np.issubdtype(startpoints.dtype, 'i'): self.startpoints = startpoints else: if __debug__: debug( 'MAP', "Boxcar: obtained startpoints are not of int type." " Rounding and changing dtype") self.startpoints = np.asanyarray(np.round(startpoints), dtype='i') # Sanity checks if boxlength < 1: raise ValueError, "Boxlength lower than 1 makes no sense." if boxlength - int(boxlength) != 0: raise ValueError, "boxlength must be an integer value." self.boxlength = int(boxlength) self.offset = offset self.__selectors = None # build a list of list where each sublist contains the indexes of to be # averaged data elements self.__selectors = [ slice(i + offset, i + offset + boxlength) \ for i in startpoints ]
def mask2slice(mask): """Convert a boolean mask vector into an equivalent slice (if possible). Parameters ---------- mask: boolean array The mask. Returns ------- slice or boolean array If possible the boolean mask is converted into a `slice`. If this is not possible the unmodified boolean mask is returned. """ # the filter should be a boolean array # TODO Could be easily extended to also accept index arrays if not len(mask): raise ValueError("Got an empty mask.") # get indices of non-zero filter elements idx = mask.nonzero()[0] if not len(idx): return slice(0) idx_start = idx[0] idx_end = idx[-1] + 1 idx_step = None if len(idx) > 1: # we need to figure out if there is a regular step-size # between elements stepsizes = np.unique(idx[1:] - idx[:-1]) if len(stepsizes) > 1: # multiple step-sizes -> slicing is not possible -> return # orginal filter return mask else: idx_step = stepsizes[0] sl = slice(idx_start, idx_end, idx_step) if __debug__: debug("SPL", "Boolean mask conversion to slice is possible (%s)." % sl) return sl
def df(x): """ Proxy to the log_marginal_likelihood first derivative. Necessary for OpenOpt when using derivatives. """ self.hyp_running_guess[self.freeHypers] = x # REMOVE print "df guess:",self.hyp_running_guess,x # XXX EO: Most of the following lines can be skipped if # df() is computed just after f() with the same # hyperparameters. The partial results obtained during f() # are what is needed for df(). For now, in order to avoid # bugs difficult to trace, we keep this redunundancy. A # deep check with how OpenOpt works or using memoization # should solve this issue. try: if self.logscale: self.parametric_model.set_hyperparameters( np.exp(self.hyp_running_guess)) else: self.parametric_model.set_hyperparameters( self.hyp_running_guess) pass except InvalidHyperparameterError: if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!") return -np.inf # Check if it is possible to avoid useless computations # already done in f(). According to tests and information # collected from OpenOpt people, it is sufficiently # unexpected that the following test succeed: if np.any(x != self.f_last_x): if __debug__: debug( "MOD_SEL", "UNEXPECTED: recomputing train+log_marginal_likelihood." ) try: self.parametric_model.train(self.dataset) except (np.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError): if __debug__: debug( "MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!" ) # XXX EO: which value for the gradient to return to # OpenOpt when hyperparameters are wrong? return np.zeros(x.size) log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood( ) # recompute what's needed (to be safe) REMOVE IN FUTURE! pass if self.logscale: gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale( ) else: gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood( ) pass # REMOVE print "grad:",gradient_log_marginal_likelihood return gradient_log_marginal_likelihood[self.freeHypers]
def _recon_customobj_defaultrecon(hdf, memo): """Reconstruct a custom object from HDF using the default recontructor""" cls_name = hdf.attrs['class'] mod_name = hdf.attrs['module'] if __debug__: debug('HDF5', "Load class instance '%s.%s' instance [%s]" % (mod_name, cls_name, hdf.name)) mod, cls = _import_from_thin_air(mod_name, cls_name) # create the object # use specialized __new__ if necessary or beneficial pcls, = _get_subclass_entry(cls, ((dict,), (list,), (object,)), "Do not know how to create instance of %(cls)s") obj = pcls.__new__(cls) # insert any stored object state _update_obj_state_from_hdf(obj, hdf, memo) # do we process a container? if 'items' in hdf: # charge the items -- handling depends on the parent class pcls, umeth, cfunc = _get_subclass_entry( cls, ((dict, 'update', _hdf_dict_to_obj), (list, 'extend', _hdf_list_to_obj)), "Unhandled container type (got: '%(cls)s').") if __debug__: debug('HDF5', "Populating %s object." % pcls) getattr(obj, umeth)(cfunc(hdf, memo)) if __debug__: debug('HDF5', "Loaded %i items." % len(obj)) return obj
def _set(self, val, init=False): if self.constraints is not None: # for c in self.constraints: # val = c(val) # #val = c.validate(val) val = self.constraints(val) different_value = self._value != val isarray = isinstance(different_value, np.ndarray) if self._ro and not init: raise RuntimeError("Attempt to set read-only parameter %s to %s" \ % (self.name, val)) if (isarray and np.any(different_value)) or \ ((not isarray) and different_value): if __debug__: debug("COL", "Parameter: setting %s to %s " % (str(self), val)) self._value = val # Set 'isset' only if not called from initialization routine self._isset = not init #True elif __debug__: debug("COL", "Parameter: not setting %s since value is the same" \ % (str(self)))
def check_all_dependencies(force=False, verbosity=1): """ Test for all known dependencies. Parameters ---------- force : boolean Whether to force the test even if it has already been performed. """ # loop over all known dependencies for dep in _KNOWN: if not exists(dep, force): if verbosity: warning("%s is not available." % dep) if __debug__: debug('EXT', 'The following optional externals are present: %s' % [k[5:] for k in cfg.options('externals') if k.startswith('have') and cfg.getboolean('externals', k)])
def reverse(self, data): """Reverse-maps data or datasets through the chain (backwards). See `Mapper` for more information. """ mp = data for m in reversed(self): # we ignore mapper that do not have reverse mapping implemented # (e.g. detrending). That might cause problems if ignoring the # mapper make the data incompatible input for the next mapper in # the chain. If that pops up, we have to think about a proper # solution. try: if __debug__: debug('MAP', "Reversing %s-shaped input though '%s'." % (mp.shape, str(m))) mp = m.reverse(mp) except NotImplementedError: if __debug__: debug('MAP', "Ignoring %s on reverse mapping." % m) return mp
def __init__(self, value=None, name=None, doc=None): """ Parameters ---------- value : arbitrary (see derived implementations) The actual value of this attribute. name : str Name of the collectable under which it should be available in its respective collection. doc : str Documentation about the purpose of this collectable. """ if doc is not None: # to prevent newlines in the docstring doc = re.sub('[\n ]+', ' ', doc) self.__doc__ = doc self.__name = name self._value = None if not value is None: self._set(value) if __debug__ and __mvpadebug__: debug("COL", "Initialized %r", (self, ))
def solve(self, problem=None): """Solve the maximization problem, check outcome and collect results. """ # XXX: this method can be made more abstract in future in the # sense that it could work not only for # log_marginal_likelihood but other measures as well # (e.g. cross-valideted error). if np.all(self.freeHypers==False): # no optimization needed self.hyperparameters_best = self.hyp_initial_guess.copy() try: self.parametric_model.set_hyperparameters(self.hyperparameters_best) except InvalidHyperparameterError: if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!") self.log_marginal_likelihood_best = -np.inf return self.log_marginal_likelihood_best self.parametric_model.train(self.dataset) self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood() return self.log_marginal_likelihood_best result = self.problem.solve(self.optimization_algorithm) # perform optimization! if result.stopcase == -1: # XXX: should we use debug() for the following messages? # If so, how can we track the missing convergence to a # solution? print "Unable to find a maximum to log_marginal_likelihood" elif result.stopcase == 0: print "Limits exceeded" elif result.stopcase == 1: self.hyperparameters_best = self.hyp_initial_guess.copy() if self.logscale: self.hyperparameters_best[self.freeHypers] = np.exp(result.xf) # best hyperparameters found # NOTE is it better to return a copy? else: self.hyperparameters_best[self.freeHypers] = result.xf pass self.log_marginal_likelihood_best = result.ff # actual best vuale of log_marginal_likelihood pass self.stopcase = result.stopcase return self.log_marginal_likelihood_best
def _get_default_c(self, data): """Compute default C TODO: for non-linear SVMs """ if self.params.kernel.__kernel_name__ == 'linear': # TODO: move into a function wrapper for # np.linalg.norm if np.issubdtype(data.dtype, np.integer): # we are dealing with integers and overflows are # possible, so assure working with floats def sq_func(x): y = x.astype(float) # copy as float y *= y # in-place square return y else: sq_func = np.square # perform it per each sample so we do not double memory # with calling sq_func on full data # Having a list of norms here automagically resolves issue # with memmapped operations on which return # in turn another memmap datasetnorm = np.mean([np.sqrt(np.sum(sq_func(s))) for s in data]) if datasetnorm == 0: warning("Obtained degenerate data with zero norm for training " "of %s. Scaling of C cannot be done." % self) return 1.0 value = 1.0/(datasetnorm**2) if __debug__: debug("SVM", "Default C computed to be %f" % value) else: warning("TODO: Computation of default C is not yet implemented" + " for non-linear SVMs. Assigning 1.0") value = 1.0 return value