def predict(self, graphs): data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) # binarize data_mtx.data = np.where(data_mtx.data > 0, 1, 0) preds = self.classifier.predict(data_mtx) return preds
def fit(self, graphs, targets): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) self.estimator = RandomForestRegressor(n_estimators=100) self.estimator = self.estimator.fit(x, targets) self.importance_dict, self.intercept = self.feature_importance(graphs) return self
def fit(self, graphs, targets): data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) # binarize data_mtx.data = np.where(data_mtx.data > 0, 1, 0) self.classifier.fit(data_mtx, targets) return self
def feature_importance(self, graphs): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) prediction, biases, contributions = ti.predict(self.estimator, x) importances = np.mean(contributions, axis=0) intercept = biases[0] importance_dict = dict(enumerate(importances)) return importance_dict, intercept
def decision_function(self, graphs): # return probability associated to largest target type data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) # binarize data_mtx.data = np.where(data_mtx.data > 0, 1, 0) preds = self.classifier.decision_function(data_mtx) return preds
def decision_function(self, graphs): # return probability associated to largest target type data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) # binarize data_mtx.data = np.where(data_mtx.data > 0, 1, 0) preds = self.classifier.predict_proba(data_mtx) # assuming binary classification and column 1 to represent positives preds = preds[:, 1].reshape(-1) return preds
def fit(self, graphs, targets): data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) # binarize data_mtx.data = np.where(data_mtx.data > 0, 1, 0) target_mtx = np.array([(0, 1) if t == 1 else (1, 0) for t in targets]) self.target_bias = right_stochastic( target_mtx.sum(axis=0).reshape(1, -1)) target_mtx = csr_matrix(target_mtx) self.classifier_mtx = target_mtx.T.dot(data_mtx) return self
def decision_function(self, graphs): # return probability associated to largest target type data_mtx = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) prediction_mtx = data_mtx.dot(self.classifier_mtx.T).todense() preds = right_stochastic(prediction_mtx) # incorporate training set class bias preds = right_stochastic( preds * np.diag(np.asarray(self.target_bias).reshape(-1))) # assuming binary classification and column 1 to represent positives preds = preds[:, 1].A.reshape(-1) return preds
def feature_importance(self, pos_graphs, neg_graphs): graphs = pos_graphs + neg_graphs y = [1] * len(pos_graphs) + [-1] * len(neg_graphs) x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) estimator = SGDClassifier(penalty='elasticnet', tol=1e-3) fs = RFECV(estimator, step=.1, cv=3) fs.fit(x, y) fs.estimator_.decision_function(fs.transform(x)).reshape(-1) self.estimator = fs.estimator_ importances = fs.inverse_transform(fs.estimator_.coef_).reshape(-1) intercept = fs.estimator_.intercept_[0] importance_dict = dict(enumerate(importances)) return importance_dict, intercept
def decision_function(self, graphs): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) return self.estimator.predict(x)
def classifier_predict(self, graphs): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) preds = self.estimator.predict(x) return preds
def classifier_decision_function(self, graphs): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) preds = self.estimator.decision_function(x) return preds
def fit(self, graphs, targets): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) self.estimator.fit(x, targets) return self
def decision_function(self, graphs): x = vectorize_graphs(graphs, encoding_func=self.encoding_func, feature_size=self.feature_size) preds = self.estimator.predict_proba(x)[:, 1].reshape(-1) return preds