def test_boston(self): print("Loading datasets...") boston_dataset = load_boston() df_train = pd.DataFrame(boston_dataset.data) df_train.columns = boston_dataset.feature_names self.y = pd.Series(boston_dataset.target) self.X = df_train self.X_train, \ self.X_test, \ self.y_train, \ self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42) rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='RootMeanSquaredError', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3) best_trial = hdt.get_best_trial() estimator = hdt.final_train(best_trial.space_sample, self.X, self.y) score = estimator.predict(self.X_test) result = estimator.evaluate(self.X_test, self.y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_fit_one_shot_model_epoch(self): rs = RandomSearcher(self.get_space_simple, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()], one_shot_mode=True, one_shot_train_sampler=rs) x, y = self.get_x_y_1() hk.fit_one_shot_model_epoch(x, y)
def test_build_dataset_iter(self): rs = RandomSearcher(self.get_space, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x, y = self.get_x_y_1() ds_iter = hk.build_dataset_iter(x, y, batch_size=10) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() assert len(x_b) == 10 assert len(y_b) == 10 batch_counter += 1 assert batch_counter == 10 ds_iter = hk.build_dataset_iter(x, y, batch_size=32) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() if batch_counter < 3: assert len(x_b) == 32 assert len(y_b) == 32 else: assert len(x_b) == 4 assert len(y_b) == 4 batch_counter += 1 assert batch_counter == 4 ds_iter = hk.build_dataset_iter(x, y, batch_size=32, repeat_count=2) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() if batch_counter < 6: assert len(x_b) == 32 assert len(y_b) == 32 else: assert len(x_b) == 8 assert len(y_b) == 8 batch_counter += 1 assert batch_counter == 7
def test_model_with_hp(self): rs = RandomSearcher(self.get_space, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x, y = self.get_x_y() hk.search(x, y, x, y, max_trails=3) best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, x, y) score = estimator.predict(x) result = estimator.evaluate(x, y) assert len(score) == 100 assert result
def test_bankdata(self): rs = RandomSearcher( tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT( rs, callbacks=[ SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs') ], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(n=2000, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search( df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_hyper_dt(self): rs = RandomSearcher( tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT( rs, callbacks=[SummaryCallback()], reward_metric='accuracy', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, cache_preprocessed_data=True, ) x1 = np.random.randint(0, 10, size=(100), dtype='int') x2 = np.random.randint(0, 2, size=(100)).astype('str') x3 = np.random.randint(0, 2, size=(100)).astype('str') x4 = np.random.normal(0.0, 1.0, size=(100)) y = np.random.randint(0, 2, size=(100), dtype='int') df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) hdt.search(df, y, df, y, max_trials=3, epochs=1) best_trial = hdt.get_best_trial() model = hdt.load_estimator(best_trial.model_file) assert model score = model.predict(df) result = model.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(model, DTEstimator) estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1) score = estimator.predict(df) result = estimator.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(estimator, DTEstimator) assert isinstance(estimator.model, DeepTable)