def _set_up_funcs(funcs, metas_ordered, Ks, dim, X_ns=None, Y_ns=None): # replace functions with partials of args def replace_func(func, info): needs_alpha = getattr(func, 'needs_alpha', False) new = None args = (Ks, dim) if needs_alpha: args = (info.alphas, ) + args if hasattr(func, 'chooser_fn'): args += (X_ns, Y_ns) if (getattr(func, 'needs_all_ks', False) and getattr(func.chooser_fn, 'returns_ks', False)): new, K = func.chooser_fn(*args) new.K_needed = K else: new = func.chooser_fn(*args) else: new = partial(func, *args) for attr in dir(func): if not (attr.startswith('__') or attr.startswith('func_')): setattr(new, attr, getattr(func, attr)) return new rep_funcs = dict( (replace_func(f, info), info) for f, info in iteritems(funcs)) rep_metas_ordered = OrderedDict( (replace_func(f, info), info) for f, info in iteritems(metas_ordered)) return rep_funcs, rep_metas_ordered
def _clone_h2o_obj(estimator, ignore=False, **kwargs): # do initial clone est = clone(estimator) # set kwargs: if kwargs: for k, v in six.iteritems(kwargs): setattr(est, k, v) # check on h2o estimator if isinstance(estimator, H2OPipeline): # the last step from the original estimator e = estimator.steps[-1][1] if isinstance(e, H2OEstimator): last_step = est.steps[-1][1] # so it's the last step for k, v in six.iteritems(e._parms): k, v = _kv_str(k, v) # if (not k in PARM_IGNORE) and (not v is None): # e._parms[k] = v last_step._parms[k] = v # otherwise it's an BaseH2OFunctionWrapper return est
def _set_up_funcs(funcs, metas_ordered, Ks, dim, X_ns=None, Y_ns=None): # replace functions with partials of args def replace_func(func, info): needs_alpha = getattr(func, 'needs_alpha', False) new = None args = (Ks, dim) if needs_alpha: args = (info.alphas,) + args if hasattr(func, 'chooser_fn'): args += (X_ns, Y_ns) if (getattr(func, 'needs_all_ks', False) and getattr(func.chooser_fn, 'returns_ks', False)): new, K = func.chooser_fn(*args) new.K_needed = K else: new = func.chooser_fn(*args) else: new = partial(func, *args) for attr in dir(func): if not (attr.startswith('__') or attr.startswith('func_')): setattr(new, attr, getattr(func, attr)) return new rep_funcs = dict( (replace_func(f, info), info) for f, info in iteritems(funcs)) rep_metas_ordered = OrderedDict( (replace_func(f, info), info) for f, info in iteritems(metas_ordered)) return rep_funcs, rep_metas_ordered
def get_params(self, deep=True): if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): if not deep: return super(EnsembleClassifier, self).get_params(deep=False) else: out = self.named_clfs.copy() for name, step in six.iteritems(self.named_clfs): for k, v in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, k)] = v return out
def get_params(self, deep=True): if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(self.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): if not deep: return super(Pipeline, self).get_params(deep=False) else: out = self.named_steps.copy() for name, step in six.iteritems(self.named_steps): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """Get classifier parameter names for GridSearch""" if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['{0}__{1}'.format(name, key)] = value return out
def get_params(self, deep=True): """ Return estimator parameter names for GridSearch support""" if not deep: return super(EnsembleClassifier, self).get_params(deep=False) else: out = self.named_clfs.copy() for name, step in six.iteritems(self.named_clfs): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """ Get classifier parameter names for GridSearch""" if not deep: return super(FWLS_Classifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """Return estimator parameter names for GridSearch support.""" if not deep: return super(EnsembleVoteClassifier, self).get_params(deep=False) else: out = self.named_clfs.copy() for name, step in six.iteritems(self.named_clfs): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): '''Получить имена параметров классификатора для GridSearch"''' if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out=self.named_classifirs.copy() for name, step in six.iteritems(self.named_classifirs): #six.iteritems-Возвращает итератор по элементам словаря. for key, value in six.iteritems(step.get_parms(deep=True)): out['%s_%s' % (name, key)] = value return out
def get_params(self, deep=True): """ Get classifier parameter names for GridSearch""" if not deep: return super(SLS_Classifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """Return estimator parameter names for GridSearch support""" if not deep: return super(HybridFeatureVotingClassifier, self).get_params(deep=False) else: out = super(HybridFeatureVotingClassifier, self).get_params(deep=False) out.update(self.named_estimators.copy()) for name, step in six.iteritems(self.named_estimators): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """Получить имена парметров классификатора для GridSearch""" if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s %s' % (name, key)] = value return out
def get_params(self, deep=True): if not deep: return super(SparkPipeline, self).get_params(deep=False) else: out = self.named_steps.copy() for name, step in six.iteritems(self.named_steps): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value out.update(super(SparkPipeline, self).get_params(deep=False)) return out
def get_params(self, deep=True): """Return estimator parameter names for GridSearch support""" if not deep: return super(MultiLabelVotingClassifier, self).get_params(deep=False) else: out = super(MultiLabelVotingClassifier, self).get_params(deep=False) out.update(self.named_estimators.copy()) for name, step in six.iteritems(self.named_estimators): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """ Get classifier parameter names for GridSearch""" if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for k, v in six.iteritems(step.get_params(deep=True)): out["%s__%s" % (name, k)] = v return out
def get_params(self, deep=True): """ GridSearchの実行時に分類金パラメータ名を取得 """ if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: # キューを"分類器の名前__パラメータ名", # バリューをパラメータの値とするディクショナリを生成 out = self.name_classifiers.copy() for name, step in six.iteritems(self.name_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """ get classifier parameter names for GridSearch useful for grid search for hyperparameter-tuning access the parameters of individual classifiers in the ensemble """ if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=False): """Return estimator parameter names for GridSearch support""" if not deep: return super(StackingRegressor, self).get_params(deep=False) else: # TODO: this will not work, need to implement `named_estimators` raise NotImplementedError("`deep` attribute not yet supported.") out = super(StackingRegressor, self).get_params(deep=False) out.update(self.named_estimators.copy()) for name, step in six.iteritems(self.named_estimators): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_params(self, deep=True): """Return estimator parameter names for GridSearch support.""" if not deep: return super(StackingCVClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value out.update(self.named_meta_classifier.copy()) for name, step in six.iteritems(self.named_meta_classifier): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def get_feature_names(self): """Array mapping from feature integer indices to feature name""" if not hasattr(self, 'vocabulary_') or len(self.vocabulary_) == 0: raise ValueError("Vocabulary wasn't fitted or is empty!") return [t for t, i in sorted(six.iteritems(self.vocabulary_), key=itemgetter(1))]
def mypp(params, offset=0, printer=repr): # Do a multi-line justified repr: options = np.get_printoptions() np.set_printoptions(precision=5, threshold=64, edgeitems=2) params_list = list() this_line_length = offset line_sep = ',\n' + (1 + offset // 2) * ' ' for i, (k, v) in enumerate(sorted(six.iteritems(params))): if type(v) is float: # use str for representing floating point numbers # this way we get consistent representation across # architectures and versions. this_repr = '%s=%s' % (k, str(v)) else: # use repr of the rest this_repr = '%s=%s' % (k, printer(v)) if len(this_repr) > 500000000: this_repr = this_repr[:300] + '...' + this_repr[-100:] if i > 0: if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): params_list.append(line_sep) this_line_length = len(line_sep) else: params_list.append(', ') this_line_length += 2 params_list.append(this_repr) this_line_length += len(this_repr) np.set_printoptions(**options) lines = ''.join(params_list) # Strip trailing space to avoid nightmare in doctests lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) return lines
def transform(self, raw_documents): """Extract token counts out of raw text documents using the vocabulary fitted with fit or the one provided in the constructor. Parameters ---------- raw_documents : iterable An iterable which yields either str, unicode or file objects. Returns ------- vectors : sparse matrix, [n_samples, n_features] """ if not hasattr(self, 'vocabulary_') or len(self.vocabulary_) == 0: raise ValueError("Vocabulary wasn't fitted or is empty!") # raw_documents can be an iterable so we don't know its size in # advance # result of document conversion to term count arrays i_indices = _make_int_array() j_indices = _make_int_array() values = _make_int_array() analyze = self.build_analyzer() for n_doc, doc in enumerate(raw_documents): term_counts = Counter(analyze(doc)) for term, count in six.iteritems(term_counts): if term in self.vocabulary_: i_indices.append(n_doc) j_indices.append(self.vocabulary_[term]) values.append(count) n_doc += 1 return self._term_counts_to_matrix(n_doc, i_indices, j_indices, values)
def _pprint(params, offset=0, printer=repr): """ From: https://github.com/scikit-learn/scikit-learn/blob/51a765a/sklearn/base.py With line 142-143 removed """ # Do a multi-line justified repr: options = np.get_printoptions() np.set_printoptions(precision=5, threshold=64, edgeitems=2) params_list = list() this_line_length = offset line_sep = ',\n' + (1 + offset // 2) * ' ' for i, (k, v) in enumerate(sorted(six.iteritems(params))): if type(v) is float: # use str for representing floating point numbers # this way we get consistent representation across # architectures and versions. this_repr = '%s=%s' % (k, str(v)) else: # use repr of the rest this_repr = '%s=%s' % (k, printer(v)) if i > 0: if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): params_list.append(line_sep) this_line_length = len(line_sep) else: params_list.append(', ') this_line_length += 2 params_list.append(this_repr) this_line_length += len(this_repr) np.set_printoptions(**options) lines = ''.join(params_list) # Strip trailing space to avoid nightmare in doctests lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) return lines
def _new_base_estimator(est, clonable_kwargs): """When the grid searches are pickled, the estimator has to be dropped out. When we load it back in, we have to reinstate a new one, since the fit is predicated on being able to clone a base estimator, we've got to have an estimator to clone and fit. Parameters ---------- est : str The type of model to build Returns ------- estimator : H2OEstimator The cloned base estimator """ est_map = { 'dl': H2ODeepLearningEstimator, 'gbm': H2OGradientBoostingEstimator, 'glm': H2OGeneralizedLinearEstimator, # 'glrm': H2OGeneralizedLowRankEstimator, # 'km' : H2OKMeansEstimator, 'nb': H2ONaiveBayesEstimator, 'rf': H2ORandomForestEstimator } estimator = est_map[est]() # initialize the new ones for k, v in six.iteritems(clonable_kwargs): k, v = _kv_str(k, v) estimator._parms[k] = v return estimator
def test_type_of_target(): for group, group_examples in iteritems(EXAMPLES): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example)))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = 'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example) try: from pandas import SparseSeries except ImportError: pass y = SparseSeries([1, 0, 0, 1, 0]) msg = "y cannot be class 'SparseSeries'." assert_raises_regex(ValueError, msg, type_of_target, y)
def _fit(self, X, y=None, **fit_params): # self._validate_steps() fit_params_steps = dict( (name, {}) for name, step in self.steps if step is not None) for pname, pval in six.iteritems(fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X for name, transform in self.steps[:-1]: start_time = time.time() if transform is None: pass elif hasattr(transform, "fit_transform"): Xt = transform.fit_transform(Xt, y, **fit_params_steps[name]) else: Xt = transform.fit(Xt, y, **fit_params_steps[name]) \ .transform(Xt) self.pipeline_info.add_preprocessor_timing( name, time.time() - start_time) if self._final_estimator is None: return Xt, {} return Xt, fit_params_steps[self.steps[-1][0]]
def test_type_of_target(): for group, group_examples in iteritems(EXAMPLES): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example)))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = 'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example) try: from pandas import SparseSeries except ImportError: raise SkipTest("Pandas not found") y = SparseSeries([1, 0, 0, 1, 0]) msg = "y cannot be class 'SparseSeries'." assert_raises_regex(ValueError, msg, type_of_target, y)
def transform(self, X): """Apply the schema normalization. Parameters ---------- X : pd.DataFrame, shape=(n_samples, n_features) The Pandas frame to transform. The operation will be applied to a copy of the input data, and the result will be returned. Returns ------- X : pd.DataFrame or np.ndarray, shape=(n_samples, n_features) The operation is applied to a copy of ``X``, and the result set is returned. """ check_is_fitted(self, "fit_cols_") X, _ = check_dataframe(X, cols=self.cols) # validate that fit cols in test set cols = self.fit_cols_ validate_test_set_columns(cols, X.columns) # normalize for k, v in six.iteritems(self.schema): X[k] = X[k].astype(v) return X # DataFrame
def _fit_transform(self, X, y=None, **fit_params): """ fit and transform X by transforming it by every step in sequence """ # shallow copy of steps - this should really be steps_ self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(_fit_transform_one) fit_params_steps = dict( (name, {}) for name, step in self.steps if step is not None) for pname, pval in six.iteritems(fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X for step_idx, (name, transformer) in enumerate(self.steps): if transformer is None: pass else: if hasattr(memory, 'cachedir') and memory.cachedir is None: # we do not clone when caching is disabled to preserve # backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) # Fit or load from cache the current transfomer Xt, fitted_transformer = fit_transform_one_cached( cloned_transformer, None, Xt, y, **fit_params_steps[name]) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) return Xt
def get_grid_results_table(search): """Get the grid results from a fit ``RandomizedSearchCV``. Parameters ---------- search : RandomizedSearchCV The pre-fit grid search. Returns ------- res : pd.DataFrame The results dataframe """ # the search results res = search.cv_results_ # unpack the dict dct = { k: res[k] for k in ('mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'mean_test_score', 'std_test_score') } prefix = "param_" for k, v in six.iteritems(res): if k.startswith(prefix): key = k.split(prefix)[-1] dct[key] = v.data return pd.DataFrame.from_dict(dct)
def restrict(self, support, indices=False): """Restrict the features to those in support. Parameters ---------- support : array-like Boolean mask or list of indices (as returned by the get_support member of feature selectors). indices : boolean, optional Whether support is a list of indices. """ if not indices: support = np.where(support)[0] names = self.feature_names_ new_vocab = {} for i in support: new_vocab[names[i]] = len(new_vocab) self.vocabulary_ = new_vocab self.feature_names_ = [ f for f, i in sorted(six.iteritems(new_vocab), key=itemgetter(1)) ] return self
def set_params(self, **params): if not params: # Simple optimisation to gain speed (inspect is slow) return self # gmm_0 gmm_0_param_dict = {} gmm_1_param_dict = {} valid_params = self.get_params(deep=True) for key, value in six.iteritems(params): split = key.split('__',1) if len(split) > 1: # combined key name prefix, name = split if prefix.find('gmm_0') >= 0: gmm_0_param_dict[name] = value elif prefix.find('gmm_1') >= 0: gmm_1_param_dict[name] = value else: raise ValueError('Invalid parameter %s ' 'for estimator %s' % (key, self.__class__.__name__)) else: # simple objects case if not key in valid_params: raise ValueError('Invalid parameter %s ' 'for estimator %s' % (key, self.__class__.__name__)) setattr(self, key, value) self.gmm_0.set_params(params=gmm_0_param_dict) self.gmm_1.set_params(params=gmm_0_param_dict) ## self.gmm_1.set_params(gmm_1_param_dict) return self
def fit(self, X, y=None): """Learn a list of feature name -> indices mappings. Parameters ---------- X : Mapping or iterable over Mappings Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). y : (ignored) Returns ------- self """ # collect all the possible feature names feature_names = set() for x in X: for f, v in six.iteritems(x): if isinstance(v, six.string_types): f_v = "%s%s%s" % (f, self.separator, v) if f_v not in self._onehot_dict: self._onehot_dict[f_v] = [f, v] f = f_v feature_names.add(f) # sort the feature names to define the mapping feature_names = sorted(feature_names) self.vocabulary_ = dict((f, i) for i, f in enumerate(feature_names)) self.feature_names_ = feature_names return self
def _remove_highandlow(self, cscmatrix, feature_to_pos, high, low): """Remove too rare or too common features. Prune features that are non zero in more samples than high or less documents than low. This does not prune samples with zero features. """ kept_indices = [] removed_indices = set() for colptr in xrange(len(cscmatrix.indptr) - 1): len_slice = cscmatrix.indptr[colptr + 1] - cscmatrix.indptr[colptr] if len_slice <= high and len_slice >= low: kept_indices.append(colptr) else: removed_indices.add(colptr) s_kept_indices = set(kept_indices) new_mapping = dict((v, i) for i, v in enumerate(kept_indices)) feature_to_pos = dict((k, new_mapping[v]) for k, v in six.iteritems(feature_to_pos) if v in s_kept_indices) return cscmatrix[:, kept_indices], feature_to_pos, removed_indices
def transform(self, X): """Transform a test matrix given the already-fit transformer. Parameters ---------- X : Pandas ``DataFrame`` The Pandas frame to transform. The operation will be applied to a copy of the input data, and the result will be returned. Returns ------- X : Pandas ``DataFrame`` The operation is applied to a copy of ``X``, and the result set is returned. """ check_is_fitted(self, 'sq_nms_') # check on state of X and cols X, _ = validate_is_pd(X, self.cols) sq_nms_ = self.sq_nms_ # scale by norms for nm, the_norm in six.iteritems(sq_nms_): X[nm] /= the_norm return X if self.as_df else X.as_matrix()
def restrict(self, support, indices=False): """Restrict the features to those in support using feature selection. This function modifies the estimator in-place. """ if self.has_been_restricted == True: return self if not indices: support = np.where(support)[0] names = self.feature_names_ new_vocab = {} for i in support: new_vocab[names[i]] = len(new_vocab) self.vocabulary_ = new_vocab self.feature_names_ = [ f for f, i in sorted(six.iteritems(new_vocab), key=itemgetter(1)) ] self.has_been_restricted = True return self
def test_paired_distances(): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((5, 4)) # Euclidean distance, with Y != X. Y = rng.random_sample((5, 4)) for metric, func in iteritems(PAIRED_DISTANCES): S = paired_distances(X, Y, metric=metric) S2 = func(X, Y) assert_array_almost_equal(S, S2) S3 = func(csr_matrix(X), csr_matrix(Y)) assert_array_almost_equal(S, S3) if metric in PAIRWISE_DISTANCE_FUNCTIONS: # Check the pairwise_distances implementation # gives the same value distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y) distances = np.diag(distances) assert_array_almost_equal(distances, S) # Check the callable implementation S = paired_distances(X, Y, metric='manhattan') S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0)) assert_array_almost_equal(S, S2) # Test that a value error is raised when the lengths of X and Y should not # differ Y = rng.random_sample((3, 4)) assert_raises(ValueError, paired_distances, X, Y)
def set_params(self, **params): """Set the parameters of this solver. :returns self : self Returns self """ if not params: # Simple optimization to gain speed(inspect is slow) return self valid_params = self.get_params(deep=True) from sklearn.externals import six for key, value in six.iteritems(params): split = key.split('__', 1) if len(split) > 1: # nested objects case name, sub_name = split if name not in valid_params: raise ValueError('Invalid parameter %s for estimator %s. ' 'Check the list of available parameters ' 'with `estimator.get_params().keys()`.' % (name, self)) sub_object = valid_params[name] sub_object.set_params(**{sub_name: value}) else: # simple objects case if key not in valid_params: raise ValueError('Invalid parameter %s for estimator %s. ' 'Check the list of available parameters ' 'with `estimator.get_params().keys()`.' % (key, self.__class__.__name__)) setattr(self, key, value) return self
def most_common(d): ''' Items of a defaultdict(int) with the heighest values. Like Counter.most_common in Python >= 2.7. ''' return sorted(six.iteritems(d), key=operator.itemgetter(1), reverse=True)
def __init__(self, cols=None, as_df=True, trans_col_name=None, **kwargs): super(_SelectiveTransformerWrapper, self).__init__( cols=cols, as_df=as_df) # this is a STATIC attribute of subclasses try: cls = self._cls except AttributeError: raise DeveloperError("_SelectiveTransformerWrapper subclasses " "must contain a static _cls attribute that " "maps to a sklearn type!") # get the (default) parameters for the estimator in question # and initialize to default self.estimator_ = cls() default_est_parms = self.estimator_.get_params(deep=True) # set the attributes in the estimator AND in the constructor so this # class behaves like sklearn in grid search self.estimator_.set_params(**kwargs) # set the kwargs here to behave like sklearn for k, v in six.iteritems(default_est_parms): if kwargs: v = kwargs.get(k, v) # try get from kwargs, fail w def. value setattr(self, k, v) self.trans_col_name = trans_col_name
def test_paired_distances(): """ Test the pairwise_distance helper function. """ rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((5, 4)) # Euclidean distance, with Y != X. Y = rng.random_sample((5, 4)) for metric, func in iteritems(PAIRED_DISTANCES): S = paired_distances(X, Y, metric=metric) S2 = func(X, Y) assert_array_almost_equal(S, S2) if metric in PAIRWISE_DISTANCE_FUNCTIONS: # Check the the pairwise_distances implementation # gives the same value distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y) distances = np.diag(distances) assert_array_almost_equal(distances, S) # Check the callable implementation S = paired_distances(X, Y, metric='manhattan') S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x -y).sum(axis=0)) assert_array_almost_equal(S, S2) # Test that a value error is raised when the lengths of X and Y should not # differ Y = rng.random_sample((3, 4)) assert_raises(ValueError, paired_distances, X, Y)
def _move_available(): to_delete = [] for n, parents in iteritems(deps): if not parents: available.add(n) to_delete.append(n) for n in to_delete: del deps[n]
def mapper(X, separator=self.separator): feature_names = [] for x in X: for f, v in six.iteritems(x): if isinstance(v, six.string_types): f = "%s%s%s" % (f, self.separator, v) feature_names.append(f) accum.add(set(feature_names))
def update(self, other): """Adds counts for elements in other""" if isinstance(other, self.__class__): for x, n in six.iteritems(other): self[x] += n else: for x in other: self[x] += 1
def test_is_multilabel(): for group, group_examples in iteritems(EXAMPLES): if group.startswith("multilabel"): assert_, exp = assert_true, "True" else: assert_, exp = assert_false, "False" for example in group_examples: assert_(is_multilabel(example), msg="is_multilabel(%r) should be %s" % (example, exp))
def get_params(self, deep=True): if not deep: return super(FeatureUnion, self).get_params(deep=False) else: out = dict(self.transformer_list) for name, trans in self.transformer_list: for key, value in iteritems(trans.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out
def test_is_multilabel(): for group, group_examples in iteritems(EXAMPLES): if group.startswith('multilabel'): assert_, exp = assert_true, 'True' else: assert_, exp = assert_false, 'False' for example in group_examples: assert_(is_multilabel(example), msg='is_multilabel(%r) should be %s' % (example, exp))
def test_type_of_target(): for group, group_examples in iteritems(EXAMPLES): for example in group_examples: assert_equal(type_of_target(example), group, msg='type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, type_of_target, example)
def set_params(self, **params): if not params: return self for key, value in six.iteritems(params): split = key.split('__', 1) if len(split) > 1: print("length is greter than one ", split, value) else: print("length is one ", split, value) setattr(self, key, value)
def test_is_label_indicator_matrix(): for group, group_examples in iteritems(EXAMPLES): if group == 'multilabel-indicator': assert_, exp = assert_true, 'True' else: assert_, exp = assert_false, 'False' for example in group_examples: assert_(is_label_indicator_matrix(example), msg='is_label_indicator_matrix(%r) should be %s' % (example, exp))
def test_is_sequence_of_sequences(): for group, group_examples in iteritems(EXAMPLES): if group == 'multilabel-sequences': assert_, exp = assert_true, 'True' else: assert_, exp = assert_false, 'False' for example in group_examples: assert_(is_sequence_of_sequences(example), msg='is_sequence_of_sequences(%r) should be %s' % (example, exp))