class MultiClassSVMLearner(Orange.core.LinearLearner): """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library. """ __new__ = _orange__new__(base=Orange.core.LinearLearner) def __init__(self, C=1.0, eps=0.01, bias=1.0, normalization=True, multinomial_treatment=DomainContinuizer.NValues, **kwargs): """\ :param C: Regularization parameter (default 1.0) :type C: float :param eps: Stopping criteria (default 0.01) :type eps: float :param bias: If non negative then each instance is appended a constant bias term (default 1.0). :type bias: float :param normalization: Normalize the input data prior to learning (default True) :type normalization: bool :param multinomial_treatment: Defines how to handle multinomial features for learning. It can be one of the :class:`~.DomainContinuizer` `multinomial_treatment` constants (default: `DomainContinuizer.NValues`). :type multinomial_treatment: int .. versionadded:: 2.6.1 Added `multinomial_treatment` """ self.C = C self.eps = eps self.bias = bias self.normalization = normalization self.multinomial_treatment = multinomial_treatment for name, val in kwargs.items(): setattr(self, name, val) self.solver_type = self.MCSVM_CS def __call__(self, data, weight_id=None): if not isinstance(data.domain.class_var, variable.Discrete): raise TypeError("Can only learn a discrete class.") if data.domain.has_discrete_attributes(False) or self.normalization: dc = DomainContinuizer() dc.multinomial_treatment = self.multinomial_treatment dc.class_treatment = dc.Ignore dc.continuous_treatment = \ dc.NormalizeBySpan if self.normalization else dc.Leave c_domain = dc(data) data = data.translate(c_domain) return super(MultiClassSVMLearner, self).__call__(data, weight_id)
class FeatureSelection(Preprocessor): """ A preprocessor that runs feature selection using an feature scoring function. :param measure: a scoring function (default: orange.MeasureAttribute_relief) :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(Preprocessor) __reduce__ = _orange__reduce__ bestN = staticmethod(_bestN) bestP = staticmethod(_bestP) def __init__(self, measure=Orange.feature.scoring.Relief(), filter=None, limit=10): self.measure = measure self.filter = filter if filter is not None else self.bestN self.limit = limit def attrScores(self, data): """ Return a list of computed scores for all attributes in `data`. """ measures = sorted([(self.measure(attr, data), attr) for attr in data.domain.attributes]) return measures def __call__(self, data, weightId=None): measures = self.attrScores(data) attrs = [attr for _, attr in self.filter(measures, self.limit)] domain = Orange.data.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return Orange.data.Table(domain, data)
class PreprocessorList(Preprocessor): """ A preprocessor wrapping a sequence of other preprocessors. :param preprocessors: a list of :obj:`Preprocessor` instances """ __new__ = _orange__new__(Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, preprocessors=()): self.preprocessors = preprocessors def __call__(self, data, weightId=None): hadWeight = hasWeight = weightId is not None for preprocessor in self.preprocessors: t = preprocessor(data, weightId) if hasWeight else preprocessor(data) if isinstance(t, tuple): data, weightId = t hasWeight = True else: data = t if hadWeight: return data, weightId else: return data
class Continuize(Preprocessor): """ A preprocessor that continuizes a discrete domain (and optionally normalizes it). See :obj:`Orange.data.continuization.DomainContinuizer` for list of accepted arguments. """ __new__ = _orange__new__(Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, zeroBased=True, multinomialTreatment=DomainContinuizer.NValues, continuousTreatment=DomainContinuizer.Leave, classTreatment=DomainContinuizer.Ignore, **kwargs): self.zeroBased = zeroBased self.multinomialTreatment = multinomialTreatment self.continuousTreatment = continuousTreatment self.classTreatment = classTreatment def __call__(self, data, weightId=0): continuizer = DomainContinuizer( zeroBased=self.zeroBased, multinomialTreatment=self.multinomialTreatment, continuousTreatment=self.continuousTreatment, classTreatment=self.classTreatment) c_domain = continuizer(data, weightId) return data.translate(c_domain)
class DiscretizeEntropy(Discretize): """ An discretizer that uses orange.EntropyDiscretization method but, unlike Preprocessor_discretize class, also removes unused attributes from the domain. """ __new__ = _orange__new__(Discretize) __reduce__ = _orange__reduce__ def __init__(self, method=Orange.feature.discretization.Entropy()): self.method = method assert (isinstance(method, Orange.feature.discretization.Entropy)) def __call__(self, data, weightId=0): newattr_list = [] for attr in data.domain.attributes: if attr.varType == Orange.feature.Type.Continuous: newattr = self.method(attr, data) if newattr.getValueFrom.transformer.points: newattr_list.append(newattr) else: newattr_list.append(attr) newdomain = Orange.data.Domain(newattr_list, data.domain.classVar) newdomain.addmetas(data.domain.getmetas()) return Orange.data.Table(newdomain, data)
class RemoveDiscrete(Continuize): """ A Preprocessor that removes all discrete attributes from the domain. """ __new__ = _orange__new__(Continuize) def __call__(self, data, weightId=None): attrs = [ attr for attr in data.domain.attributes if attr.varType == Orange.feature.Type.Continuous ] domain = Orange.data.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return Orange.data.Table(domain, data)
class RemoveContinuous(Discretize): """ A preprocessor that removes all continuous features. """ __new__ = _orange__new__(Discretize) __reduce__ = _orange__reduce__ def __call__(self, data, weightId=None): attrs = [ attr for attr in data.domain.attributes if attr.varType == Orange.feature.Type.Discrete ] domain = Orange.data.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return Orange.data.Table(domain, data)
class Impute(Preprocessor): """ A preprocessor that imputes unknown values using a learner. :param model: a learner class. """ __new__ = _orange__new__(Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, model=None, **kwargs): self.model = Orange.classification.majority.MajorityLearner( ) if model is None else model def __call__(self, data, weightId=0): return ImputeByLearner(data, learner=self.model)
class Sample(Preprocessor): """ A preprocessor that samples a subset of the data. :param filter: a filter function to use for selection (default Preprocessor_sample.selectNRandom) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(Preprocessor) __reduce__ = _orange__reduce__ selectNRandom = staticmethod(_selectNRandom) selectPRandom = staticmethod(_selectPRandom) def __init__(self, filter=None, limit=10): self.filter = filter if filter is not None else self.selectNRandom self.limit = limit def __call__(self, data, weightId=None): return Orange.data.Table(data.domain, self.filter(data, self.limit))
class RFE(FeatureSelection): """ A preprocessor that runs RFE(Recursive Feature Elimination) using linear SVM derived attribute weights. :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(FeatureSelection) __reduce__ = _orange__reduce__ def __init__(self, filter=None, limit=10): super(RFE, self).__init__(filter=filter, limit=limit) def __call__(self, data, weightId=None): from Orange.classification.svm import RFE rfe = RFE() filtered = self.filter(range(len(data)), self.limit) return rfe(data, len(filtered))
class LinearSVMLearner(Orange.core.LinearLearner): """Train a linear SVM model.""" L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC __new__ = _orange__new__(base=Orange.core.LinearLearner) def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, bias=1.0, normalization=True, multinomial_treatment=DomainContinuizer.NValues, **kwargs): """ :param solver_type: One of the following class constants: ``L2R_L2LOSS_DUAL``, ``L2R_L2LOSS``, ``L2R_L1LOSS_DUAL``, ``L1R_L2LOSS`` The first part (``L2R`` or ``L1R``) is the regularization term on the weight vector (squared or absolute norm respectively), the ``L1LOSS`` or ``L2LOSS`` indicate absolute or squared loss function ``DUAL`` means the optimization problem is solved in the dual space (for more information see the documentation on `LIBLINEAR`_). :param C: Regularization parameter (default 1.0) :type C: float :param eps: Stopping criteria (default 0.01) :type eps: float :param bias: If non negative then each instance is appended a constant bias term (default 1.0). :type bias: float :param normalization: Normalize the input data into range [0..1] prior to learning (default ``True``) :type normalization: bool :param multinomial_treatment: Defines how to handle multinomial features for learning. It can be one of the :class:`~.DomainContinuizer` `multinomial_treatment` constants (default: `DomainContinuizer.NValues`). :type multinomial_treatment: int .. versionadded:: 2.6.1 Added `multinomial_treatment` .. note:: By default if the training data contains discrete features they are replaced by indicator columns one for each value of the feature regardless of the value of `normalization`. This is different then in :class:`SVMLearner` where this is done only if `normalization` is ``True``. Example >>> linear_svm = LinearSVMLearner( ... solver_type=LinearSVMLearner.L1R_L2LOSS, ... C=2.0) ... """ self.solver_type = solver_type self.eps = eps self.C = C self.bias = bias self.normalization = normalization self.multinomial_treatment = multinomial_treatment for name, val in kwargs.items(): setattr(self, name, val) if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]: warnings.warn( " Deprecated 'solver_type', use " "'Orange.classification.logreg.LibLinearLogRegLearner'" "to build a logistic regression models using LIBLINEAR.", DeprecationWarning ) def __call__(self, data, weight_id=None): if not isinstance(data.domain.class_var, variable.Discrete): raise TypeError("Can only learn a discrete class.") if data.domain.has_discrete_attributes(False) or self.normalization: dc = DomainContinuizer() dc.multinomial_treatment = self.multinomial_treatment dc.class_treatment = dc.Ignore dc.continuous_treatment = \ dc.NormalizeBySpan if self.normalization else dc.Leave c_domain = dc(data) data = data.translate(c_domain) return super(LinearSVMLearner, self).__call__(data, weight_id)
class SVMLearner(_SVMLearner): """ :param svm_type: the SVM type :type svm_type: SVMLearner.SVMType :param kernel_type: the kernel type :type kernel_type: SVMLearner.Kernel :param degree: kernel parameter (only for ``Polynomial``) :type degree: int :param gamma: kernel parameter; if 0, it is set to 1.0/#features (for ``Polynomial``, ``RBF`` and ``Sigmoid``) :type gamma: float :param coef0: kernel parameter (for ``Polynomial`` and ``Sigmoid``) :type coef0: int :param kernel_func: kernel function if ``kernel_type`` is ``kernels.Custom`` :type kernel_func: callable object :param C: C parameter (for ``C_SVC``, ``Epsilon_SVR`` and ``Nu_SVR``) :type C: float :param nu: Nu parameter (for ``Nu_SVC``, ``Nu_SVR`` and ``OneClass``) :type nu: float :param p: epsilon parameter (for ``Epsilon_SVR``) :type p: float :param cache_size: cache memory size in MB :type cache_size: int :param eps: tolerance of termination criterion :type eps: float :param probability: build a probability model :type probability: bool :param shrinking: use shrinking heuristics :type shrinking: bool :param normalization: normalize the input data prior to learning into range [0..1] and replace discrete features with indicator columns one for each value of the feature using :class:`~Orange.data.continuization.DomainContinuizer` class (default ``True``) :type normalization: bool :param weight: a list of class weights :type weight: list :param verbose: If `True` show training progress (default is `False`). :type verbose: bool Example: >>> import Orange >>> from Orange.classification import svm >>> from Orange.evaluation import testing, scoring >>> data = Orange.data.Table("vehicle.tab") >>> learner = svm.SVMLearner() >>> results = testing.cross_validation([learner], data, folds=5) >>> print "CA: %.4f" % scoring.CA(results)[0] CA: 0.7908 >>> print "AUC: %.4f" % scoring.AUC(results)[0] AUC: 0.9565 """ __new__ = _orange__new__(_SVMLearner) C_SVC = _SVMLearner.C_SVC Nu_SVC = _SVMLearner.Nu_SVC OneClass = _SVMLearner.OneClass Nu_SVR = _SVMLearner.Nu_SVR Epsilon_SVR = _SVMLearner.Epsilon_SVR @Orange.utils.deprecated_keywords({"kernelFunc": "kernel_func"}) def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, coef0=0, shrinking=True, probability=True, verbose=False, cache_size=200, eps=0.001, normalization=True, weight=[], **kwargs): self.svm_type = svm_type self.kernel_type = kernel_type self.kernel_func = kernel_func self.C = C self.nu = nu self.p = p self.gamma = gamma self.degree = degree self.coef0 = coef0 self.shrinking = shrinking self.probability = probability self.verbose = verbose self.cache_size = cache_size self.eps = eps self.normalization = normalization for key, val in kwargs.items(): setattr(self, key, val) self.learner = Orange.core.SVMLearner(**kwargs) self.weight = weight max_nu = staticmethod(max_nu) def __call__(self, data, weight=0): """Construct a SVM classifier :param table: data with continuous features :type table: Orange.data.Table :param weight: ignored (required due to base class signature); """ examples = Orange.core.Preprocessor_dropMissingClasses(data) class_var = examples.domain.class_var if len(examples) == 0: raise ValueError("Example table is without any defined classes") # Fix the svm_type parameter if we have a class_var/svm_type mismatch if self.svm_type in [0, 1] and \ isinstance(class_var, Orange.feature.Continuous): self.svm_type += 3 if self.svm_type in [3, 4] and \ isinstance(class_var, Orange.feature.Discrete): self.svm_type -= 3 if self.kernel_type == kernels.Custom and not self.kernel_func: raise ValueError("Custom kernel function not supplied") nu = self.nu if self.svm_type == SVMLearner.Nu_SVC: # Check if nu is feasible max_nu = self.max_nu(examples) if self.nu > max_nu: if getattr(self, "verbose", 0): warnings.warn("Specified nu %.3f is infeasible. \ Setting nu to %.3f" % (self.nu, max_nu)) nu = max(max_nu - 1e-7, 0.0) for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", "gamma", "degree", "coef0", "shrinking", "probability", "verbose", "cache_size", "eps"]: setattr(self.learner, name, getattr(self, name)) self.learner.nu = nu self.learner.set_weights(self.weight) if self.svm_type == SVMLearner.OneClass and self.probability: self.learner.probability = False warnings.warn("One-class SVM probability output not supported.") return self.learn_classifier(examples) def learn_classifier(self, data): if self.normalization: data = self._normalize(data) svm = self.learner(data) return SVMClassifier(svm) @Orange.utils.deprecated_keywords({"progressCallback": "progress_callback"}) def tune_parameters(self, data, parameters=None, folds=5, verbose=0, progress_callback=None): """Tune the ``parameters`` on the given ``data`` using internal cross validation. :param data: data for parameter tuning :type data: Orange.data.Table :param parameters: names of parameters to tune (default: ["nu", "C", "gamma"]) :type parameters: list of strings :param folds: number of folds for internal cross validation :type folds: int :param verbose: set verbose output :type verbose: bool :param progress_callback: callback function for reporting progress :type progress_callback: callback function Here is example of tuning the `gamma` parameter using 3-fold cross validation. :: svm = Orange.classification.svm.SVMLearner() svm.tune_parameters(table, parameters=["gamma"], folds=3) """ import orngWrap if parameters is None: parameters = ["nu", "C", "gamma"] searchParams = [] normalization = self.normalization if normalization: data = self._normalize(data) self.normalization = False if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \ and "nu" in parameters: if isinstance(data.domain.class_var, variable.Discrete): max_nu = max(self.max_nu(data) - 1e-7, 0.0) else: max_nu = 1.0 searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \ i / 10.0 < max_nu] + [max_nu])) elif "C" in parameters: searchParams.append(("C", [2 ** a for a in range(-5, 15, 2)])) if self.kernel_type == 2 and "gamma" in parameters: searchParams.append(("gamma", [2 ** a for a in range(-5, 5, 2)] + [0]) ) tunedLearner = orngWrap.TuneMParameters(object=self, parameters=searchParams, folds=folds, returnWhat=orngWrap.TuneMParameters.returnLearner, progressCallback=progress_callback if progress_callback else lambda i: None) tunedLearner(data, verbose=verbose) if normalization: self.normalization = normalization def _normalize(self, data): dc = preprocess.DomainContinuizer() dc.class_treatment = preprocess.DomainContinuizer.Ignore dc.continuous_treatment = preprocess.DomainContinuizer.NormalizeBySpan dc.multinomial_treatment = preprocess.DomainContinuizer.NValues newdomain = dc(data) return data.translate(newdomain)
class L(Orange.core.Learner): __new__ = utils._orange__new__(Orange.core.Learner) def __call__(self, data, weight=0): return data, weight, self.msg
class A(Orange.core.OrangeBase): __new__ = utils._orange__new__(Orange.core.OrangeBase) def __call__(self, data): return data, self.name, self.msg