class GENDISFeatures(BaseEstimator, TransformerMixin): def __init__(self): pass def fit(self, X, y): print(X.shape) self.genetic_extractor = GeneticExtractor(verbose=True, population_size=25, iterations=10, wait=5, max_len=50, plot=None, location=True, n_jobs=4, fitness=auc_fitness_location) self.genetic_extractor.fit(X, y) self.names = [] for i, shap in enumerate(self.genetic_extractor.shapelets): self.names.append('dist_shap_{}'.format(i)) for i, shap in enumerate(self.genetic_extractor.shapelets): self.names.append('loc_shap_{}'.format(i)) return self def transform(self, X): return self.genetic_extractor.transform(X) def fit_transform(self, X, y): self.fit(X, y) return self.transform(X)
def test_accept_string_labels(): X = [ [0] * 8, [0] * 8, [0] * 8, [0] * 8, [1] * 8, [1] * 8, [1] * 8, [1] * 8, ] y = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(X, y)
def test_accept_float_labels(): X = [ [0] * 8, [0] * 8, [0] * 8, [0] * 8, [1] * 8, [1] * 8, [1] * 8, [1] * 8, ] y = [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0] genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(X, y)
def test_accept_list(): X = [ [0] * 8, [0] * 8, [0] * 8, [0] * 8, [1] * 8, [1] * 8, [1] * 8, [1] * 8, ] y = [0, 0, 0, 0, 1, 1, 1, 1] genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(X, y)
def test_accept_variable_length_arrays(): X = [ [0] * 10, [0] * 6, [0] * 8, [0] * 6, [1] * 8, [1] * 7, [1] * 8, [1] * 5, ] y = [0, 0, 0, 0, 1, 1, 1, 1] genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(X, y)
def test_accept_pd_DataFrame(): X = [ [0] * 8, [0] * 8, [0] * 8, [0] * 8, [1] * 8, [1] * 8, [1] * 8, [1] * 8, ] y = [0, 0, 0, 0, 1, 1, 1, 1] pd_X = pd.DataFrame(X) pd_y = pd.Series(y) genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(pd_X, pd_y)
def fit(self, X, y): print(X.shape) self.genetic_extractor = GeneticExtractor(verbose=True, population_size=25, iterations=10, wait=5, max_len=50, plot=None, location=True, n_jobs=4, fitness=auc_fitness_location) self.genetic_extractor.fit(X, y) self.names = [] for i, shap in enumerate(self.genetic_extractor.shapelets): self.names.append('dist_shap_{}'.format(i)) for i, shap in enumerate(self.genetic_extractor.shapelets): self.names.append('loc_shap_{}'.format(i)) return self
def teast_pipeline(): X, y = random_walk_blobs(n_ts_per_blob=20, sz=64, noise_level=0.1) X = np.reshape(X, (X.shape[0], X.shape[1])) extractor = GeneticExtractor(iterations=5, n_jobs=1, population_size=10) lr = LogisticRegression() pipeline = Pipeline([ ('shapelets', extractor), ('log_reg', lr) ]) pipeline.fit(X, y)
def test_accept_np_array(): X = [ [0] * 8, [0] * 8, [0] * 8, [0] * 8, [1] * 8, [1] * 8, [1] * 8, [1] * 8, ] y = [0, 0, 0, 0, 1, 1, 1, 1] np_X = [] for x in X: np_X.append(np.array(x)) np_X = np.array(np_X) np_y = np.array(y) genetic = GeneticExtractor(population_size=5, iterations=5) genetic.fit(np_X, np_y)
def test_serialization(): X, y = random_walk_blobs(n_ts_per_blob=20, sz=64, noise_level=0.1) X = np.reshape(X, (X.shape[0], X.shape[1])) extractor = GeneticExtractor(iterations=5, n_jobs=1, population_size=10) distances = extractor.fit_transform(X, y) extractor.save('temp.p') new_extractor = GeneticExtractor.load('temp.p') new_distances = new_extractor.transform(X) np.testing.assert_array_equal(distances, new_distances) os.remove('temp.p')