def test_filter_values_covered_by_single_interval(filter_values): """Verify that a single intervals covers all the values in ``filter_values``""" # TODO: Extend to inputs with shape (n_samples, 1) cover = OneDimensionalCover(n_intervals=1) interval_masks = cover.fit_transform(filter_values) # TODO: Generate filter_values with desired shape assert_almost_equal(filter_values[:, None][interval_masks], filter_values)
def test_two_dimensional_tensor(pts): """Verify that the oneDimensionalCover fails for an input with more than one dimension, and that the CubicalCover does not.""" one_d = OneDimensionalCover() with pytest.raises(ValueError): cubical = CubicalCover() _ =
def test_cubical_fit_transform_consistent_with_OneD(filter, kind, n_intervals, overlap_fraction): """Check that CubicalCover gives the same results as OneDimensionalCover, on one-d data """ one_d = OneDimensionalCover(kind, n_intervals, overlap_fraction) cubical = CubicalCover(kind, n_intervals, overlap_fraction) x_one_d = one_d.fit_transform(filter) x_cubical = cubical.fit_transform(filter) assert_almost_equal(x_one_d, x_cubical)
def test_fit_transform_limits_not_computed(): """We do not compute intervals when `kind`= `'balanced'`, unless fit is explicitly called.""" cover = OneDimensionalCover(n_intervals=10, kind='balanced', overlap_frac=0.3) x = np.arange(0, 30) _ = cover.fit_transform(x) with pytest.raises(NotFittedError): _ = cover.get_fitted_intervals()
def test_balanced_is_balanced(balanced_cover): """Test that each point is in one interval, and that each interval has ``nb_in_each_interval`` points.""" points, nb_in_each_interval, nb_intervals = balanced_cover cover = OneDimensionalCover(kind='balanced', n_intervals=nb_intervals, overlap_frac=0.01) mask = cover.fit_transform(points) # each interval contains nb_in_each_interval points assert all([s == nb_in_each_interval for s in np.sum(mask, axis=0)]) # each point is in exactly one interval assert all([s == 1 for s in np.sum(mask, axis=1)])
def test_filter_values_covered_by_interval_union(filter_values, n_intervals): """Test that each value is at least in one interval. (that is, the cover is a true cover).""" # TODO: Extend to inputs with shape (n_samples, 1) cover = OneDimensionalCover(n_intervals=n_intervals) interval_masks = cover.fit_transform(filter_values) intervals = [filter_values[interval_masks[:, i]] for i in range(interval_masks.shape[1])] intervals_union = reduce(np.union1d, intervals) filter_values_union = filter_values[np.in1d(filter_values, intervals_union)] assert_almost_equal(filter_values_union, filter_values)
def _runMapper(self): """ creates mapper graphs based on train data :return: None """ log.debug("--->creating mappers...") if not self.remake and os.path.exists(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label): fgin = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "rb") self.graphs = pickle.load(fgin) fpin = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "rb") self.mapper_pipes = pickle.load(fpin) return clusterer = FirstSimpleGap() self.mapper_pipes = [] log.debug("------> creating projection components...") for k in range(self.n_components): log.debug("---------> on component {}/{}...".format(k + 1, self.n_components)) proj = Projection(columns=k) filter_func = Pipeline(steps=[('pca', self.rep), ('proj', proj)]) filtered_data = filter_func.fit_transform( cover = OneDimensionalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac, kind='balanced') mapper_pipe = make_mapper_pipeline(scaler=None, filter_func=filter_func, cover=cover, clusterer=clusterer, verbose=(log.getEffectiveLevel() == logging.DEBUG), n_jobs=1) mapper_pipe.set_params(filter_func__proj__columns=k) self.mapper_pipes.append(("PCA%d" % (k + 1), mapper_pipe)) # try parallelization log.debug("------> entering parallelization...") self.graphs = [mapper_pipe[1].fit_transform( for mapper_pipe in self.mapper_pipes] # # self.graphs = Parallel(n_jobs=5, prefer="threads")( # delayed(mapper_pipe[1].fit_transform)( for mapper_pipe in self.mapper_pipes # ) fg = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "wb") pickle.dump(self.graphs, fg) fg.close() fp = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "wb") pickle.dump(self.mapper_pipes, fp) fp.close()
def test_equal_interval_length(filter_values, n_intervals, overlap_frac): """Test that all the intervals have the same length, up to an additive constant of 0.1.""" cover = OneDimensionalCover(kind="uniform", n_intervals=n_intervals, overlap_frac=overlap_frac) cover = lower_limits, upper_limits = np.array( list(map(tuple, zip(*cover.get_fitted_intervals()[1:-1]))) ) # rounding precision decimals = 10 assert len(set(np.floor((upper_limits - lower_limits) * decimals).tolist())) == 1
def test_one_dimensional_cover_shape(filter_values, n_intervals): """Assert that the length of the mask ``unique_interval_masks`` corresponds to the pre-specified ``n_samples`` and that there are no more intervals in the cover than ``n_intervals``. The case when the filter has only a unique value, in which case fit_transform should throw an error, is treated separately.""" # TODO: Extend to inputs with shape (n_samples, 1) cover = OneDimensionalCover(n_intervals=n_intervals) n_samples, n_intervals = len(filter_values), cover.n_intervals try: unique_interval_masks = cover.fit_transform(filter_values) assert n_samples == unique_interval_masks.shape[0] assert n_intervals >= unique_interval_masks.shape[1] except ValueError as ve: assert ve.args[0] == f"Only one unique filter value found, cannot " \ f"fit {n_intervals} > 1 intervals." assert (n_intervals > 1) and (len(np.unique(filter_values)) == 1)
def test_fit_transform_against_fit_and_transform( pts, n_intervals, kind, overlap_frac ): """Fitting and transforming should give the same result as fit_transform""" cover = OneDimensionalCover(n_intervals=n_intervals, kind=kind, overlap_frac=overlap_frac) x_fit_transf = cover.fit_transform(pts) cover2 = OneDimensionalCover(n_intervals=n_intervals, kind=kind, overlap_frac=overlap_frac) cover2 = x_fit_and_transf = cover2.transform(pts) assert_almost_equal(x_fit_transf, x_fit_and_transf)
def test_contract_nodes(): """Test that, on a pathological dataset, we generate a graph without edges when `contract_nodes` is set to False and with edges when it is set to True.""" X = make_circles(n_samples=2000)[0] filter_func = Projection() cover = OneDimensionalCover(n_intervals=5, overlap_frac=0.4) p = filter_func.fit_transform(X) m = cover.fit_transform(p) gap = 0.1 idx_to_remove = [] for i in range(m.shape[1] - 1): inters = np.logical_and(m[:, i], m[:, i + 1]) inters_idx = np.flatnonzero(inters) p_inters = p[inters_idx] min_p, max_p = np.min(p_inters), np.max(p_inters) idx_to_remove += list(np.flatnonzero((min_p <= p) & (p <= min_p + gap))) idx_to_remove += list(np.flatnonzero((max_p - gap <= p) & (p <= max_p))) X_f = X[[x for x in range(len(X)) if x not in idx_to_remove]] clusterer = DBSCAN(eps=0.05) pipe = make_mapper_pipeline(filter_func=filter_func, cover=cover, clusterer=clusterer, contract_nodes=True) graph = pipe.fit_transform(X_f) assert not len( pipe.set_params(contract_nodes=False) graph = pipe.fit_transform(X_f) assert len(