def __init__(self, threshold, min_entries=10, metric='minkowski', k=1, n_jobs=1): """Initialize the metric Args: threshold (float): Maximum distance for a prediction to be "trustable" min_entries (int): Minimum number of training entries before surrogate can be evaluated metric (string): Distance metric to use k (int): Number of nearest neighbors to consider n_jobs (int): Number of threads to use when computing distances """ super().__init__() # Make the featurizer # TODO (wardlt): This code is duplicated in the inference engine. Maybe we should let "featurizer" be a param cm = CoulombMatrix(flatten=True) cm.set_n_jobs(1) self.cm = Pipeline([('featurizer', cm), ('scaler', RobustScaler())]) # Save the other things self.threshold = threshold self.min_entries = min_entries self.metric = metric self.k = k self.n_jobs = n_jobs self.nn_ = None
def __init__(self, n_neighbors: int = 5): """ Args: n_neighbors (int): Number of neighboring points to use for the NN model """ cm = CoulombMatrix(flatten=True) cm.set_n_jobs(1) model = Pipeline([('featurizer', cm), ('scaler', RobustScaler()), ('model', KNeighborsRegressor(n_neighbors))]) super().__init__(model)