from sparktk.loggers import log_load; log_load(__name__); del log_load from sparktk.propobj import PropertiesObject from sparktk import TkContext __all__ = ["train", "load", "KMeansModel"] def train(frame, columns, k=2, scalings=None, max_iter=20, epsilon=1e-4, seed=None, init_mode="k-means||"): """ Creates a KMeansModel by training on the given frame :param frame: (Frame) frame of training data :param columns: (List[str]) names of columns containing the observations for training :param k: (Optional (int)) number of clusters :param scalings: (Optional(List[float])) column scalings for each of the observation columns. The scaling value is multiplied by the corresponding value in the observation column :param max_iter: (Optional(int)) number of iterations for which the algorithm should run :param epsilon: (Optional(float)) distance threshold within which we consider k-means to have converged. Default is 1e-4. If all centers move less than this Euclidean distance, we stop iterating one run :param seed: Optional(long) seed for randomness :param init_mode: (Optional(str)) the initialization technique for the algorithm. It can be either "random" to choose random points as initial clusters or "k-means||" to use a parallel variant of k-means++. Default is "k-means|| :return: (KMeansModel) trained KMeans model """ tc = frame._tc _scala_obj = get_scala_obj(tc) if isinstance(columns, basestring): columns = [columns] scala_columns = tc.jutils.convert.to_scala_vector_string(columns)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from sparktk.frame.ops.classification_metrics_value import ClassificationMetricsValue from sparktk.models.logistic_regression_summary_table import LogisticRegressionSummaryTable from sparktk.loggers import log_load from sparktk.propobj import PropertiesObject from sparktk import TkContext from sparktk.arguments import affirm_type log_load(__name__) del log_load __all__ = ["train", "LogisticRegressionModel"] def train(frame, observation_columns, label_column, frequency_column=None, num_classes=2, optimizer="LBFGS", compute_covariance=True, intercept=True, feature_scaling=False, threshold=0.5, reg_type="L2",
# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from sparktk.loggers import log_load log_load(__name__) del log_load from sparktk.propobj import PropertiesObject from sparktk import TkContext __all__ = ["train", "load", "PcaModel"] def train(frame, columns, mean_centered=True, k=None): """ Creates a PcaModel by training on the given frame Parameters ----------
# Copyright (c) 2016 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from sparktk.loggers import log_load; log_load(__name__); del log_load from sparktk.propobj import PropertiesObject from sparktk.frame.ops.classification_metrics_value import ClassificationMetricsValue from sparktk import TkContext __all__ = ["train", "load", "SvmModel"] def train(frame, label_column, observation_columns, intercept = True, num_iterations = 100, step_size = 1.0, reg_type = None, reg_param = 0.01,