return RegressionModelsMethods(self._df) @classmethod def _predict(cls, df, estimator, *args, **kwargs): data = df.data.values eval_MSE = kwargs.get('eval_MSE', False) if eval_MSE: y, MSE = estimator.predict(data, *args, **kwargs) if y.ndim == 1: y = df._constructor_sliced(y, index=df.index) MSE = df._constructor_sliced(MSE, index=df.index) else: y = df._constructor(y, index=df.index) MSE = df._constructor(MSE, index=df.index) return y, MSE else: y = estimator.predict(data, *args, **kwargs) if y.ndim == 1: y = df._constructor_sliced(y, index=df.index) else: y = df._constructor(y, index=df.index) return y class RegressionModelsMethods(_AccessorMethods): _module_name = 'sklearn.gaussian_process.regression_models' _regression_methods = ['constant', 'linear', 'quadratic'] _attach_methods(RegressionModelsMethods, _wrap_data_func, _regression_methods)
if is_integer_dtype(values): # integer raises an error in normalize values = values.astype(np.float) result = func(values, *args, **kwargs) result = self._constructor(result, index=self._data.index, columns=self._data.columns) else: # ModelSeries values = np.atleast_2d(self._df.values) if is_integer_dtype(values): values = values.astype(np.float) result = func(values, *args, **kwargs) result = self._constructor(result[0], index=self._df.index, name=self._df.name) return result f.__doc__ = (""" Call ``%s`` using automatic mapping. - ``X``: ``ModelFrame.data`` """ % func_name) return f _attach_methods(PreprocessingMethods, _wrap_func, _preprocessing_methods)
# Pairwise metrics @property def pairwise(self): """Not implemented""" raise NotImplementedError # y_true and y_pred _classification_methods = ['accuracy_score', 'classification_report', 'hamming_loss', 'jaccard_similarity_score', 'matthews_corrcoef', 'zero_one_loss'] _regression_methods = ['explained_variance_score', 'mean_absolute_error', 'mean_squared_error', 'r2_score'] _cluster_methods = ['mutual_info_score'] _true_pred_methods = (_classification_methods + _regression_methods + _cluster_methods) _attach_methods(MetricsMethods, _wrap_target_pred_func, _true_pred_methods) # methods which doesn't take additional arguments _cluster_methods_noargs = ['adjusted_mutual_info_score', 'adjusted_rand_score', 'completeness_score', 'homogeneity_completeness_v_measure', 'homogeneity_score', 'normalized_mutual_info_score', 'v_measure_score'] _attach_methods(MetricsMethods, _wrap_target_pred_noargs, _cluster_methods_noargs)
- ``y``: ``ModelFrame.target`` """ func = self._module.lasso_stability_path data = self._data target = self._target alpha_grid, scores_path = func(data.values, y=target.values, *args, **kwargs) scores_path = self._constructor(scores_path, index=data.columns) return alpha_grid, scores_path def orthogonal_mp_gram(self, *args, **kwargs): """ Call ``sklearn.linear_model.orthogonal_mp_gram`` using automatic mapping. - ``Gram``: ``ModelFrame.data.T.dot(ModelFrame.data)`` - ``Xy``: ``ModelFrame.data.T.dot(ModelFrame.target)`` """ func = self._module.orthogonal_mp_gram data = self._data.values target = self._target.values gram = data.T.dot(data) Xy = data.T.dot(target) coef = func(gram, Xy, *args, **kwargs) return coef _lm_methods = ['orthogonal_mp'] _attach_methods(LinearModelMethods, _wrap_data_target_func, _lm_methods)
Call ``sklearn.linear_model.lasso_stability_path`` using automatic mapping. - ``X``: ``ModelFrame.data`` - ``y``: ``ModelFrame.target`` """ func = self._module.lasso_stability_path data = self._data target = self._target alpha_grid, scores_path = func(data.values, y=target.values, *args, **kwargs) scores_path = self._constructor(scores_path, index=data.columns) return alpha_grid, scores_path def orthogonal_mp_gram(self, *args, **kwargs): """ Call ``sklearn.linear_model.orthogonal_mp_gram`` using automatic mapping. - ``Gram``: ``ModelFrame.data.T.dot(ModelFrame.data)`` - ``Xy``: ``ModelFrame.data.T.dot(ModelFrame.target)`` """ func = self._module.orthogonal_mp_gram data = self._data.values target = self._target.values gram = data.T.dot(data) Xy = data.T.dot(target) coef = func(gram, Xy, *args, **kwargs) return coef _lm_methods = ['orthogonal_mp'] _attach_methods(LinearModelMethods, _wrap_data_target_func, _lm_methods)
labels = self._constructor_sliced(labels, index=data.index) return cluster_centers, labels def spectral_clustering(self, *args, **kwargs): """ Call ``sklearn.cluster.spectral_clustering`` using automatic mapping. - ``affinity``: ``ModelFrame.data`` """ func = self._module.spectral_clustering data = self._data labels = func(data.values, *args, **kwargs) labels = self._constructor_sliced(labels, index=data.index) return labels # Biclustering @property def bicluster(self): """Property to access ``sklearn.cluster.bicluster``""" return self._bicluster @cache_readonly def _bicluster(self): return _AccessorMethods(self._df, module_name='sklearn.cluster.bicluster') _cluster_methods = ['estimate_bandwidth', 'ward_tree'] _attach_methods(ClusterMethods, _wrap_data_func, _cluster_methods)
- ``x``: ``ModelFrame.target_name`` """ % func_name) return f def _wrap_data_plot(func, func_name): """ Wrapper for plotting with data """ def f(self, *args, **kwargs): return func(data=self._df, *args, **kwargs) f.__doc__ = ( """ Call ``%s`` using automatic mapping. - ``data``: ``ModelFrame`` """ % func_name) return f _xy_plots = ['jointplot', 'lmplot', 'regplot', 'residplot'] _attach_methods(SeabornMethods, _wrap_xy_plot, _xy_plots) _categorical_plots = ['factorplot', 'boxplot', 'violinplot', 'stripplot', 'pointplot', 'barplot'] _attach_methods(SeabornMethods, _wrap_categorical_plot, _categorical_plots) _data_plots = ['pairplot'] _attach_methods(SeabornMethods, _wrap_data_plot, _data_plots)
#!/usr/bin/env python from pandas_ml.core.accessor import _AccessorMethods, _attach_methods, _wrap_data_func class NeighborsMethods(_AccessorMethods): """ Accessor to ``sklearn.neighbors``. """ _module_name = 'sklearn.neighbors' _neighbor_methods = ['kneighbors_graph', 'radius_neighbors_graph'] _attach_methods(NeighborsMethods, _wrap_data_func, _neighbor_methods)
return f def _wrap_data_plot(func, func_name): """ Wrapper for plotting with data """ def f(self, *args, **kwargs): return func(data=self._df, *args, **kwargs) f.__doc__ = ( """ Call ``%s`` using automatic mapping. - ``data``: ``ModelFrame`` """ % func_name ) return f _xy_plots = ["jointplot", "lmplot", "regplot", "residplot"] _attach_methods(SeabornMethods, _wrap_xy_plot, _xy_plots) _categorical_plots = ["factorplot", "boxplot", "violinplot", "stripplot", "pointplot", "barplot"] _attach_methods(SeabornMethods, _wrap_categorical_plot, _categorical_plots) _data_plots = ["pairplot"] _attach_methods(SeabornMethods, _wrap_data_plot, _data_plots)
# Pairwise metrics @property def pairwise(self): """Not implemented""" raise NotImplementedError # y_true and y_pred _classification_methods = [ 'accuracy_score', 'classification_report', 'hamming_loss', 'jaccard_score', 'matthews_corrcoef', 'zero_one_loss' ] _regression_methods = [ 'explained_variance_score', 'mean_absolute_error', 'mean_squared_error', 'r2_score' ] _cluster_methods = ['mutual_info_score'] _true_pred_methods = (_classification_methods + _regression_methods + _cluster_methods) _attach_methods(MetricsMethods, _wrap_target_pred_func, _true_pred_methods) # methods which doesn't take additional arguments _cluster_methods_noargs = [ 'adjusted_mutual_info_score', 'adjusted_rand_score', 'completeness_score', 'homogeneity_completeness_v_measure', 'homogeneity_score', 'normalized_mutual_info_score', 'v_measure_score' ] _attach_methods(MetricsMethods, _wrap_target_pred_noargs, _cluster_methods_noargs)
#!/usr/bin/env python from pandas_ml.core.accessor import _AccessorMethods, _attach_methods, _wrap_data_func class NeighborsMethods(_AccessorMethods): """ Accessor to ``sklearn.neighbors``. """ _module_name = 'sklearn.neighbors' _neighbor_methods = ['kneighbors_graph', 'radius_neighbors_graph'] _attach_methods(NeighborsMethods, _wrap_data_func, _neighbor_methods)
if isinstance(self._df, ModelFrame): values = self._data.values if is_integer_dtype(values): # integer raises an error in normalize values = values.astype(np.float) result = func(values, *args, **kwargs) result = self._constructor(result, index=self._data.index, columns=self._data.columns) else: # ModelSeries values = np.atleast_2d(self._df.values) if is_integer_dtype(values): values = values.astype(np.float) result = func(values, *args, **kwargs) result = self._constructor(result[0], index=self._df.index, name=self._df.name) return result f.__doc__ = ( """ Call ``%s`` using automatic mapping. - ``X``: ``ModelFrame.data`` """ % func_name) return f _attach_methods(PreprocessingMethods, _wrap_func, _preprocessing_methods)
#!/usr/bin/env python from pandas_ml.core.accessor import _AccessorMethods, _attach_methods, _wrap_data_target_func class FeatureSelectionMethods(_AccessorMethods): """ Accessor to ``sklearn.feature_selection``. """ _module_name = 'sklearn.feature_selection' _fs_methods = ['chi2', 'f_classif', 'f_regression'] _attach_methods(FeatureSelectionMethods, _wrap_data_target_func, _fs_methods)
return RegressionModelsMethods(self._df) @classmethod def _predict(cls, df, estimator, *args, **kwargs): data = df.data.values eval_MSE = kwargs.get('eval_MSE', False) if eval_MSE: y, MSE = estimator.predict(data, *args, **kwargs) if y.ndim == 1: y = df._constructor_sliced(y, index=df.index) MSE = df._constructor_sliced(MSE, index=df.index) else: y = df._constructor(y, index=df.index) MSE = df._constructor(MSE, index=df.index) return y, MSE else: y = estimator.predict(data, *args, **kwargs) if y.ndim == 1: y = df._constructor_sliced(y, index=df.index) else: y = df._constructor(y, index=df.index) return y class RegressionModelsMethods(_AccessorMethods): _module_name = 'sklearn.gaussian_process.regression_models' _regression_methods = ['constant', 'linear', 'quadratic'] _attach_methods(RegressionModelsMethods, _wrap_data_func, _regression_methods)
cluster_centers, labels = func(data.values, *args, **kwargs) labels = self._constructor_sliced(labels, index=data.index) return cluster_centers, labels def spectral_clustering(self, *args, **kwargs): """ Call ``sklearn.cluster.spectral_clustering`` using automatic mapping. - ``affinity``: ``ModelFrame.data`` """ func = self._module.spectral_clustering data = self._data labels = func(data.values, *args, **kwargs) labels = self._constructor_sliced(labels, index=data.index) return labels # Biclustering @property def bicluster(self): """Property to access ``sklearn.cluster.bicluster``""" return self._bicluster @cache_readonly def _bicluster(self): return _AccessorMethods(self._df, module_name='sklearn.cluster.bicluster') _cluster_methods = ['estimate_bandwidth', 'ward_tree'] _attach_methods(ClusterMethods, _wrap_data_func, _cluster_methods)