def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None): """ constructor @param columns specify a columns selection @param remove modalities to remove @param skip_errors skip when a new categories appear (no 1) @param single use a single column per category, do not multiply them for each value @param fLOG logging function The logging function displays a message when a new dense and big matrix is created when it should be sparse. A sparse matrix should be allocated instead. """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self._p_columns = columns if isinstance( columns, list) or columns is None else [columns] self._p_skip_errors = skip_errors self._p_remove = remove self._p_single = single self.fLOG = fLOG
def __init__(self, estimator=None, clus=None, **kwargs): """ @param estimator :epkg:`sklearn:linear_model:LogisiticRegression` by default @param clus clustering applied on each class, by default k-means with two classes @param kwargs sent to :meth:`set_params <mlinsights.mlmodel.classification_kmeans. ClassifierAfterKMeans.set_params>`, see its documentation to understand how to specify parameters """ ClassifierMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = LogisticRegression() if clus is None: clus = KMeans(n_clusters=2) self.estimator = estimator self.clus = clus if not hasattr(clus, "transform"): raise AttributeError( # pragma: no cover "clus does not have a transform method.") if kwargs: self.set_params(**kwargs)
def __init__(self, estimator=None, max_depth=20, min_samples_split=2, min_samples_leaf=2, min_weight_fraction_leaf=0.0, fit_improve_algo='auto', p1p2=0.09, gamma=1., verbose=0): "constructor" ClassifierMixin.__init__(self) BaseEstimator.__init__(self) # logistic regression if estimator is None: self.estimator = LogisticRegression() else: self.estimator = estimator if max_depth is None: raise ValueError("'max_depth' cannot be None.") if max_depth > 1024: raise ValueError("'max_depth' must be <= 1024.") self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.fit_improve_algo = fit_improve_algo self.p1p2 = p1p2 self.gamma = gamma self.verbose = verbose if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values: raise ValueError("fit_improve_algo='{}' not in {}".format( self.fit_improve_algo, DecisionTreeLogisticRegression._fit_improve_algo_values))
def __init__(self, n_estimators=20, max_depth=5, min_samples_split=10, min_samples_leaf=10, random_state=0, em_itrs=5, regularization=0.05, passive_dyn_func=None, passive_dyn_ctrl=None, passive_dyn_noise=None, verbose=False): ''' n_estimators - number of ensembled models ... - a batch of parameters used for RandomTreesEmbedding, see relevant documents em_itrs - maximum number of EM iterations to take regularization - small positive scalar to prevent singularity of matrix inversion passive_dyn_func - function to evaluate passive dynamics; None for MaxEnt model passive_dyn_ctrl - function to return the control matrix which might depend on the state... passive_dyn_noise - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model note this implies a dynamical system with constant input gain. It is extendable to have state dependent input gain then we need covariance for each data point verbose - output training information ''' BaseEstimator.__init__(self) self.n_estimators=n_estimators self.max_depth=max_depth self.min_samples_split=min_samples_split self.min_samples_leaf=min_samples_leaf self.random_state=random_state self.em_itrs=em_itrs self.reg=regularization self.passive_dyn_func=passive_dyn_func self.passive_dyn_ctrl=passive_dyn_ctrl self.passive_dyn_noise=passive_dyn_noise self.verbose=verbose return
def __init__(self, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, **kwargs): TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = KNeighborsRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.keep_tsne_outputs = keep_tsne_outputs if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' " "method.".format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError("estimator {} does not have a 'predict' " "method.".format(type(estimator))) self.normalize = normalize if kwargs: self.set_params(**kwargs)
def __init__(self, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, **kwargs): """ :param transformer: `TSNE` by default :param estimator: `MLPRegressor` by default :param normalize: normalizes the outputs, centers and normalizes the output of the *t-SNE* and applies that same normalization to he prediction of the estimator :param keep_tsne_output: if True, keep raw outputs of :epkg:`TSNE` is stored in member *tsne_outputs_* :param kwargs: sent to :meth:`set_params <mlinsights.mlmodel. tsne_transformer.PredictableTSNE.set_params>`, see its documentation to understand how to specify parameters """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = KNeighborsRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.keep_tsne_outputs = keep_tsne_outputs if not hasattr(transformer, "fit_transform"): raise AttributeError( "Transformer {} does not have a 'fit_transform' " "method.".format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError( "Estimator {} does not have a 'predict' method.".format( type(estimator))) self.normalize = normalize if kwargs: self.set_params(**kwargs)
def __init__(self, binner=None, estimator=None, n_jobs=None, verbose=False): """ @param binner transformer or predictor which creates the buckets @param estimator predictor trained on every bucket @param n_jobs number of parallel jobs (for training and predicting) @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm` to fit the estimators *binner* must be filled or must be: - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer` - any instanciated model *estimator* allows the following values: - ``None``: the model is :epkg:`sklearn:linear_model:LinearRegression` - any instanciated model """ BaseEstimator.__init__(self) if estimator is None: raise ValueError( # pragma: no cover "estimator cannot be null.") if binner is None: raise TypeError( # pragma: no cover "Unsupported options for binner=='tree' and model {}.".format( type(estimator))) elif binner == "bins": binner = KBinsDiscretizer() self.binner = binner self.estimator = estimator self.n_jobs = n_jobs self.verbose = verbose
def __init__(self, estimator, method=None, copy_estimator=True): """ @param estimator estimator to wrap in a transformer, it is cloned with the training data (deep copy) when fitted @param method if None, guess what method should be called, *transform* for a transformer, *predict_proba* for a classifier, *decision_function* if found, *predict* otherwiser @param copy_estimator copy the model instead of taking a reference """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) self.estimator = estimator self.copy_estimator = copy_estimator if method is None: if hasattr(estimator, "transform"): method = "transform" elif hasattr(estimator, "predict_proba"): method = "predict_proba" elif hasattr(estimator, "decision_function"): method = "decision_function" elif hasattr(estimator, "predict"): method = "predict" else: raise AttributeError( "Cannot find a method transform, predict_proba, decision_function, predict in object {}" .format(type(estimator))) if not hasattr(estimator, method): raise AttributeError("Cannot find method '{}' in object {}".format( method, type(estimator))) self.method = method
def __init__(self, embedding, m=10, analyzer=None, eqe=1, verbose=0, a=1, c=0, n_jobs=1): """ Initializes the embedding based query language model query expansion technique """ BaseEstimator.__init__(self) self._embedding = embedding self._analyzer = analyzer if eqe not in [1, 2]: raise ValueError self._eqe = eqe self.verbose = verbose self._a = a self._c = c self.m = m self.n_jobs = n_jobs self.vocabulary = None
def __init__(self, estimator=None, threshold=0.75): ClassifierMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = LogisticRegression(solver='liblinear') self.estimator = estimator self.threshold = threshold
def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False, poly_include_bias=True): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.kind = kind self.poly_degree = poly_degree self.poly_include_bias = poly_include_bias self.poly_interaction_only = poly_interaction_only
def __init__(self, p=1, n_jobs=-1, warmup=1000, samples_per_chain=1000, n_chains=4, normalize=True): BaseEstimator.__init__(self) BaseAR.__init__(self, n_jobs=n_jobs, warmup=warmup, samples_per_chain=samples_per_chain, n_chains=4, normalize=True) self.p = p return
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = onnx_bytes self.output_name = output_name self.enforce_float32 = enforce_float32 if not isinstance(onnx_bytes, bytes): raise TypeError("onnx_bytes must be bytes to be pickled.")
def __init__(self, vocab, merges, padding_length=-1, opset=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.vocab = vocab self.merges = merges self.padding_length = padding_length self.opset = opset if get_library_path is None: raise ImportError("onnxruntime_extensions is not installed.")
def __init__(self, num_inputs, mxseed=0, epochs=5000, net_type=1): BaseEstimator.__init__(self) RegressorMixin.__init__(self) self.net = None self.num_inputs = num_inputs self.mxseed = mxseed self.epochs = epochs self.net_type = net_type return
def __init__(self, embedding, analyzer='word', m=10, verbose=0, use_idf=True, **ev_params): """Expand a query by the nearest known tokens to its centroid """ self.embedding = embedding self.m = m self.vect = EmbeddedVectorizer(embedding, analyzer=analyzer, use_idf=use_idf, **ev_params) BaseEstimator.__init__(self)
def __init__(self, name, fct, kwargs): """ @param name function name @param fct python function @param kwargs parameters function """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.name_fct = name self._fct = fct self.kwargs = kwargs
def __init__(self, scaler_model, clf_model, hmm_model): prob_bins = np.array([-np.inf, 0.1, 0.3, 0.5, 0.7, 0.9, np.inf]) bins_discretizer = KBinsDiscretizer(encode='ordinal') bins_discretizer.n_bins_ = np.array([prob_bins.shape[0]]) bins_discretizer.bin_edges_ = prob_bins.reshape(1, -1) TransformerMixin.__init__(self) BaseEstimator.__init__(self) self.scaler_model_ = scaler_model self.clf_model_ = clf_model self.hmm_model_ = hmm_model self.bins_discretizer_ = bins_discretizer
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True, runtime='python', change_batch_size=None, reshape=False): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = (onnx_bytes if not hasattr(onnx_bytes, 'SerializeToString') else onnx_bytes.SerializeToString()) self.output_name = output_name self.enforce_float32 = enforce_float32 self.runtime = runtime self.change_batch_size = change_batch_size self.reshape = reshape
def __init__(self, onnx_bytes, output_name=None): """ :param onnx_bytes: bytes :param output_name: requested output name or None to request all and have method *transform* to store all of them in a dataframe """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = onnx_bytes self.output_name = output_name if not isinstance(onnx_bytes, bytes): raise TypeError("onnx_bytes must be bytes to be pickled.")
def __init__(self, model='SIR', t=0, max_iter=100, learning_rate_init=0.1, lr_schedule='constant', momentum=0.9, power_t=0.5, early_th=None, min_threshold='auto', max_threshold='auto', verbose=False, init=None): if init is not None: if isinstance(init, EpidemicRegressor): if hasattr(init, 'coef_'): init = init.coef_.copy() else: init = None # pragma: no cover elif not isinstance(init, dict): raise TypeError( f"init must be a dictionary not {type(init)}.") BaseEstimator.__init__(self) RegressorMixin.__init__(self) self.t = t self.model = model self.max_iter = max_iter self.learning_rate_init = learning_rate_init self.lr_schedule = lr_schedule self.momentum = momentum self.power_t = power_t self.early_th = early_th self.verbose = verbose if min_threshold == 'auto': if model.upper() in ('SIR', 'SIRD'): min_threshold = 0.0001 elif model.upper() in ('SIRC', ): pmin = dict(beta=0.001, nu=0.0001, mu=0.0001, a=-1., b=0., c=0.) min_threshold = numpy.array( [pmin[k[0]] for k in CovidSIRDc.P0]) elif model.upper() in ('SIRDC'): pmin = dict(beta=0.001, nu=0.001, mu=0.001, a=-1., b=0., c=0.) min_threshold = numpy.array( [pmin[k[0]] for k in CovidSIRDc.P0]) if max_threshold == 'auto': if model.upper() in ('SIR', 'SIRD'): max_threshold = 1. elif model.upper() in ('SIRC', 'SIRDC'): pmax = dict(beta=1., nu=0.5, mu=0.5, a=0., b=4., c=2.) max_threshold = numpy.array( [pmax[k[0]] for k in CovidSIRDc.P0]) self.min_threshold = min_threshold self.max_threshold = max_threshold self._get_model() self.init = init if init is not None: self.coef_ = init
def __init__(self, force_positive=False, **kwargs): """ *kwargs* should contains parameters for :epkg:`sklearn:decomposition:NMF`. The parameter *force_positive* removes all negative predictions and replaces by zero. """ BaseEstimator.__init__(self) RegressorMixin.__init__(self) MultiOutputMixin.__init__(self) for k, v in kwargs.items(): setattr(self, k, v) self.force_positive = force_positive
def __init__(self, embedding, analyzer, m=10): """Initializes Embedding Based Query Expansion :embedding: TODO :analyzer: TODO :m: TODO """ BaseEstimator.__init__(self) self._embedding = embedding self._m = m self._cv = CountVectorizer(analyzer=analyzer)
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True, runtime='onnxruntime1'): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = (onnx_bytes if not hasattr(onnx_bytes, 'SerializeToString') else onnx_bytes.SerializeToString()) self.output_name = output_name self.enforce_float32 = enforce_float32 self.runtime = runtime
def __init__(self, n_jobs=-1, warmup=1000, samples_per_chain=1000, n_chains=4, normalize=True, max_samples_mem=500): """ An interface to the following stan model y0 ~ cauchy(0, 1); nu ~ cauchy(0, 1); sigma ~ normal(0, 1); // half-normal lam ~ exponential(1); theta ~ normal(0, lam); y ~ student_t(nu, y0 + Q * theta, sigma); params: n_jobs: Number of cores to use warmup: Number of warmup iterations for HMC, roughly analagous to a burnin period. samples_per_chain: Number of samples to draw per chain n_chains: Number of chains (should run at least 2) normalize: Whether to normalize the data before feeding it to stan. This is necessary as the priors in the model are fixed. max_samples_mem: A parameter to prevent blowing up all the memory when sampling the posterior predictive. """ BaseEstimator.__init__(self) StanCacheMixin.__init__(self, MODEL_DIR) self.stan_model, self.predict_model = self._load_compiled_models() self.stan_fitting_kwargs = { "chains": n_chains, "iter_sampling": samples_per_chain, "iter_warmup": warmup, "inits": 1, "metric": "diag_e", "adapt_delta": 0.8 } self._fit_results = None self.normalize = normalize self.max_samples_mem = max_samples_mem if normalize: self._y_ss = StandardScaler() self._X_ss = StandardScaler() return
def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None, nopython=True): BaseEstimator.__init__(self) self.estimator = estimator self.runtime = runtime self.enforce_float32 = enforce_float32 self.target_opset = target_opset self.conv_options = conv_options self.nopython = nopython
def __init__(self, model, periods=1, freq='30min'): """Lags a dataset. Lags all features. Missing data is dropped for fitting, and replaced with the mean for predict. :periods: Number of timesteps to lag by """ assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model" BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.periods = periods self.freq = freq self.model = model
def __init__(self, rf_estimator=None, lasso_estimator=None): """ @param rf_estimator random forest estimator, :epkg:`sklearn:ensemble:RandomForestRegressor` by default @param lass_estimator Lasso estimator, :epkg:`sklearn:linear_model:LassoRegression` by default """ BaseEstimator.__init__(self) RegressorMixin.__init__(self) if rf_estimator is None: rf_estimator = RandomForestRegressor() if lasso_estimator is None: lasso_estimator = Lasso() self.rf_estimator = rf_estimator self.lasso_estimator = lasso_estimator
def __init__(self, n_jobs=-1, warmup=1000, samples_per_chain=1000, n_chains=4, normalize=True, max_samples_mem=500): BaseEstimator.__init__(self) StanCacheMixin.__init__(self, MODEL_DIR) self.stan_model, self.predict_model = self._load_compiled_models() # The control parameters for NUTS, most are left as default control = { "metric": "diag_e", # Type of mass matrix (diag_e default) "stepsize_jitter": 0.05, # Slight randomization of stepsizes "adapt_engaged": True, "adapt_gamma": 0.05, # Regularization scale "adapt_delta": 0.8, # Target acceptance probability (.8 default) "adapt_kappa": 0.75, # Relaxation exponent "adapt_t0": 10, # Adaptation iteration offset "adapt_init_buffer": 75, # First fast adapt period "adapt_term_buffer": 50, # Last fast adapt period "adapt_window": 25, # First slow adapt period "max_treedepth": 10, # N_leapfrog ~ 2**max_treedepth } self.stan_fitting_kwargs = { "chains": n_chains, "iter": samples_per_chain + warmup, "warmup": warmup, "init": "random", "init_r": 1.0, "n_jobs": n_jobs, "control": control } self._fit_results = None self._fit_X = None self.normalize = normalize self.max_samples_mem = max_samples_mem if normalize: self._y_ss = StandardScaler(with_mean=True) self._X_ss = StandardScaler() return
def __init__(self, wv, m=10, analyzer=str.split, eqe=1, verbose=0, a=1, c=0, n_jobs=1): """ Initializes the embedding based query language model query expansion technique """ BaseEstimator.__init__(self) self._wv = wv self._analyzer = analyzer if eqe not in [1, 2]: raise ValueError self._eqe = eqe self.verbose = verbose self._a = a self._c = c self.m = m self.n_jobs = n_jobs self.vocabulary = None
def __init__( self, normalizer=None, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, ): """ @param normalizer None by default @param transformer :epkg:`sklearn:manifold:TSNE` by default @param estimator :epkg:`sklearn:neural_network:MLPRegressor` by default @param normalize normalizes the outputs, centers and normalizes the output of the *t-SNE* and applies that same normalization to he prediction of the estimator @param keep_tsne_output if True, keep raw outputs of :epkg:`TSNE` is stored in member *tsne_outputs_* """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = MLPRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.normalizer = normalizer self.keep_tsne_outputs = keep_tsne_outputs if normalizer is not None and not hasattr(normalizer, "transform"): raise AttributeError( "normalizer {} does not have a 'transform' method.".format( type(normalizer))) if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' method.". format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError( "estimator {} does not have a 'predict' method.".format( type(estimator))) self.normalize = normalize
def __init__(self, estimator=None, n_estimators=10, n_jobs=None, alpha=1., verbose=False): """ @param estimator predictor trained on every bucket @param n_estimators number of estimators to train @param n_jobs number of parallel jobs (for training and predicting) @param alpha proportion of samples resampled for each training @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm` to fit the estimators """ BaseEstimator.__init__(self) RegressorMixin.__init__(self) if estimator is None: raise ValueError("estimator cannot be null.") self.estimator = estimator self.n_jobs = n_jobs self.alpha = alpha self.verbose = verbose self.n_estimators = n_estimators
def __init__(self, retrieval_model, matching=None, query_expansion=None, name='RM', labels=None): """TODO: to be defined1. :retrieval_model: A retrieval model satisfying fit and query. :vectorizer: A vectorizer satisfying fit and transform (and fit_transform). :matching: A matching operation satisfying fit and predict. :query_expansion: A query operation satisfying fit and transform :labels: Pre-defined mapping of indices to identifiers, will be inferred during fit, if not given. """ BaseEstimator.__init__(self) self._retrieval_model = retrieval_model self._matching = matching self._query_expansion = query_expansion self.name = name self.labels_ = np.asarray(labels) if labels is not None else None
def __init__(self, cost_func, n_class=2): BaseEstimator.__init__(self) self.n_class = n_class self.cost_func = cost_func