def test_contingencies(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) iris.domain = Domain(iris.domain[:2] + (EqualWidth()(iris, iris.domain['sepal width']),), iris.domain['iris']) conts = get_contingencies(iris) self.assertEqual(len(conts), 3) self.assertIsInstance(conts[0], Continuous) self.assertIsInstance(conts[1], Continuous) self.assertIsInstance(conts[2], Discrete)
def get_root(self, data, xvar, yvar, zvar=None): """Compute the root density map item""" assert self.n_bins > 2 x_disc = EqualWidth(n=self.n_bins)(data, xvar) y_disc = EqualWidth(n=self.n_bins)(data, yvar) def bins(var): points = list(var.compute_value.points) assert points[0] <= points[1] width = points[1] - points[0] return np.array([points[0] - width] + points + [points[-1] + width]) xbins = bins(x_disc) ybins = bins(y_disc) # Extend the lower/upper bin edges to infinity. # (the grid_bin function has an optimization for this case). xbins1 = np.r_[-np.inf, xbins[1:-1], np.inf] ybins1 = np.r_[-np.inf, ybins[1:-1], np.inf] t = grid_bin(data, xvar, yvar, xbins1, ybins1, zvar=zvar) return t._replace(xbins=xbins, ybins=ybins)
""" import pandas as pd import datetime from Orange.data import Table, Domain from Orange.classification import NNClassificationLearner from Orange.evaluation import CrossValidation, scoring from Orange.preprocess import Normalize, Scale from Orange.preprocess import DomainDiscretizer from Orange.preprocess.discretize import EqualWidth raw_data_table = Table.from_file("white wine.csv") feature_vars = list(raw_data_table.domain.variables[1:11]) #Bucket the classifier into distinct bins discretizer = DomainDiscretizer() discretizer.method = EqualWidth(n=9) discretizer_domain = discretizer(raw_data_table) class_label_var = discretizer_domain[0] print(class_label_var.values) wine_domain = Domain(feature_vars, class_label_var) data_table = Table.from_table(domain=wine_domain, source=raw_data_table) Table.save(data_table, "data_table normal2.csv") def normalize_table(table_to_process): norm = Normalize(norm_type=Normalize.NormalizeBySpan) norm.transform_class = False norm_data_table = norm(table_to_process) norm_data_table.shuffle() return norm_data_table #Normalise the feature values norm_data_table = normalize_table(data_table) print("Applying learner on total data records {}".format(len(norm_data_table)))