def main(readcsv=read_csv, method='defaultDense'): # Input data set parameters train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') # Read data. Let's use 5 features per observation nFeatures = 5 nClasses = 5 train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute train_algo = d4p.kdtree_knn_classification_training(nClasses=nClasses) # 'weights' is optional argument, let's use equal weights # in this case results must be the same as without weights weights = np.ones((train_data.shape[0], 1)) train_result = train_algo.compute(train_data, train_labels, weights) # Now let's do some prediction predict_data = readcsv(predict_file, range(nFeatures)) predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute predict_algo = d4p.kdtree_knn_classification_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) # We expect less than 170 mispredicted values assert np.count_nonzero(predict_labels != predict_result.prediction) < 170 return (train_result, predict_result, predict_labels)
def main(): # Input data set parameters train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') # Read data. Let's use 5 features per observation nFeatures = 5 train_data = read_csv(train_file, range(nFeatures)) train_labels = read_csv(train_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute train_algo = d4p.kdtree_knn_classification_training() train_result = train_algo.compute(train_data, train_labels) # Now let's do some prediction predict_data = read_csv(predict_file, range(nFeatures)) predict_labels = read_csv(predict_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute predict_algo = d4p.kdtree_knn_classification_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) return (train_result, predict_result, predict_labels)
def fit(self, X, y): # Check the algorithm parameters if not ((isinstance(self.n_neighbors, numbers.Integral)) and (self.n_neighbors > 0)): raise ValueError('Parameter "n_neighbors" must be ' 'non-zero positive integer value.') if not self.weights == 'uniform': warnings.warn('Value "{}" for argument "weights" not supported. ' 'Using default "uniform".'.format(self.weights), RuntimeWarning, stacklevel=2) self.weights = 'uniform' if not self.algorithm == 'kd_tree': warnings.warn('Value "{}" for argument "algorithm" not supported. ' 'Using default "kd_tree".'.format(self.algorithm), RuntimeWarning, stacklevel=2) self.algorithm = 'kd_tree' if not self.leaf_size == 31: warnings.warn('Value "{}" for argument "leaf_size" not supported. ' 'Using default "31".'.format(self.leaf_size), RuntimeWarning, stacklevel=2) self.leaf_size = 31 if not self.p == 2: warnings.warn('Value "{}" for argument "p" not supported. ' 'Using default "2".'.format(self.p), RuntimeWarning, stacklevel=2) self.p = 2 if not self.metric == 'minkowski': warnings.warn('Value "{}" for argument "metric" not supported. ' 'Using default "minkowski".'.format(self.metric), RuntimeWarning, stacklevel=2) self.metric = 'minkowski' if self.metric_params is not None: warnings.warn( 'Argument "metric_params" not (yet) supported. ' 'Ignored.', RuntimeWarning, stacklevel=2) self.metric_params = None if self.n_jobs is not None: warnings.warn( 'Argument "n_jobs" not (yet) supported. ' 'Ignored. All available processors will be used.', RuntimeWarning, stacklevel=2) self.n_jobs = None # Check that X and y have correct shape X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double]) check_classification_targets(y) # Encode labels le = preprocessing.LabelEncoder() le.fit(y) self.classes_ = le.classes_ y_ = le.transform(y) # Convert to 2d array y_ = y_.reshape((-1, 1)) self.n_classes_ = len(self.classes_) self.n_features_ = X.shape[1] # Classifier can't train when only one class is present. # Trivial case if self.n_classes_ == 1: return self # Get random seed rs = check_random_state(None) self.seed_ = rs.randint(np.iinfo('i').max) # Define type of data fptype = getFPType(X) # Fit the model train_algo = d4p.kdtree_knn_classification_training( fptype=fptype, engine=d4p.engines_mcg59(seed=self.seed_)) train_result = train_algo.compute(X, y_) # Store the model self.daal_model_ = train_result.model # Return the classifier return self