def test_optimize_modeling(self): _feature_engineer: FeatureEngineer = FeatureEngineer( df=DATA_SET, target_feature='AveragePrice', temp_dir=DATA_DIR, auto_typing=False) _feature_engineer.set_predictors(exclude_original_data=False) _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=_feature_engineer.get_target(), input_file_path=None, train_data_file_path=None, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=_feature_engineer.get_predictors(), re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['cat'], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=2, pop_size=4, mutation_rate=0.1, mutation_prob=0.85, parents_ratio=0.5, early_stopping=0, convergence=True, convergence_measure='median', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, verbose=False, checkpoint=True, feature_engineer=_feature_engineer, sampling_function=None) _ga.optimize() self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <= _ga.evolution_gradient.get('max')[-1])
def test_optimize_modeling_text_clustering(self): _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=None, input_file_path=None, train_data_file_path=DATA_FILE_PATH_CLUSTER, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=[PREDICTOR_CLUSTER], re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['gsdmm' ], #[np.random.choice(a=list(CLUSTER_ALGORITHMS.keys()))], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=2, pop_size=3, mutation_prob=0.5, mutation_rate=0.85, early_stopping=0, convergence=True, convergence_measure='median', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, feature_engineer=None, sampling_function=None, **dict(sep=',', tokenize=True)) _ga.optimize() self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <= _ga.evolution_gradient.get('max')[-1])
def test_optimize_modeling_text_classification(self): _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=TARGET_TEXT, input_file_path=None, train_data_file_path=TRAIN_DATA_PATH_TEXT, test_data_file_path=TEST_DATA_PATH_TEXT, valid_data_file_path=VALIDATION_DATA_PATH_TEXT, df=None, data_set=None, features=PREDICTORS_TEXT, re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['trans'], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=1, pop_size=4, mutation_rate=0.5, mutation_prob=0.85, early_stopping=0, convergence=True, convergence_measure='median', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=UNIQUE_LABELS, log=False, feature_engineer=None, sampling_function=None, **dict(sep=',')) _ga.optimize() self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <= _ga.evolution_gradient.get('max')[-1])
def test_save_evolution(self): _feature_engineer: FeatureEngineer = FeatureEngineer( df=DATA_SET, target_feature='AveragePrice') _feature_engineer.set_predictors(exclude_original_data=False) _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=_feature_engineer.get_target(), input_file_path=None, train_data_file_path=None, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=_feature_engineer.get_predictors(), re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['cat'], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=5, pop_size=64, mutation_rate=0.1, mutation_prob=0.15, parents_ratio=0.5, early_stopping=0, convergence=True, convergence_measure='median', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, feature_engineer=_feature_engineer, sampling_function=None) _ga.optimize() _ga.save_evolution(ga=True, model=True, evolution_history=True, generation_history=True, final_generation=True) _found_pickles: List[bool] = [] for pickle in [ 'model', 'GeneticAlgorithm', 'evolution_history', 'generation_history', 'final_generation' ]: if os.path.isfile('data/{}.p'.format(pickle)): _found_pickles.append(True) else: _found_pickles.append(False) self.assertTrue(expr=all(_found_pickles))
def test_visualize_reg(self): _feature_engineer: FeatureEngineer = FeatureEngineer( df=DATA_SET, target_feature='AveragePrice') _feature_engineer.set_predictors(exclude_original_data=False) _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=_feature_engineer.get_target(), input_file_path=None, train_data_file_path=None, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=_feature_engineer.get_predictors(), re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=None, model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=5, pop_size=4, mutation_rate=0.1, mutation_prob=0.15, parents_ratio=0.5, early_stopping=0, convergence=True, convergence_measure='median', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, feature_engineer=_feature_engineer, sampling_function=None) _ga.optimize() _ga.visualize(results_table=True, model_distribution=True, model_evolution=True, param_distribution=True, train_time_distribution=True, breeding_map=True, breeding_graph=True, fitness_distribution=True, fitness_evolution=True, fitness_dimensions=True, per_generation=True, prediction_of_best_model=True, epoch_stats=True) _found_plot: List[bool] = [] for plot in [ 'ga_metadata_table', 'ga_model_evolution', 'ga_model_distribution', #'ga_parameter_treemap', 'ga_training_time_distribution', #'ga_breeding_heatmap', #'ga_breeding_graph', 'ga_fitness_score_distribution_per_generation', 'ga_metadata_evolution_coords_actor_only', 'ga_evolution_fitness_score', 'ga_prediction_evaluation_coords', 'ga_prediction_scatter_contour' ]: if os.path.isfile('data/{}.html'.format(plot)): _found_plot.append(True) else: _found_plot.append(False) self.assertTrue(expr=all(_found_plot))
def test_ga_clf(self): _feature_engineer: FeatureEngineer = FeatureEngineer( df=DATA_SET, target_feature='type') _feature_engineer.set_predictors(exclude=None, exclude_original_data=False) _ga: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=_feature_engineer.get_target(), input_file_path=None, train_data_file_path=None, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=_feature_engineer.get_predictors(), re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['cat'], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=10, pop_size=64, mutation_rate=0.1, mutation_prob=0.15, parents_ratio=0.5, early_stopping=0, convergence=True, convergence_measure='min', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, feature_engineer=_feature_engineer, sampling_function=None) _ga.optimize() _feature_learning: FeatureLearning = FeatureLearning( feature_engineer=_feature_engineer, df=None, file_path=None, target=_feature_engineer.get_target(), force_target_type=None, max_features=-1, keep_fittest_only=True, train_categorical_critic=False, train_continuous_critic=False, engineer_time_disparity=True, engineer_categorical=True, engineer_text=True, output_path='data') _feature_learning_engineer = _feature_learning.ga() _ga_using_new_features: GeneticAlgorithm = GeneticAlgorithm( mode='model', target=_feature_learning_engineer.get_target(), input_file_path=None, train_data_file_path=None, test_data_file_path=None, valid_data_file_path=None, df=None, data_set=None, features=_feature_learning_engineer.get_predictors(), re_split_data=False, re_sample_cases=False, re_sample_features=False, re_populate=True, max_trials=2, max_features=-1, labels=None, models=['cat'], model_params=None, burn_in_generations=-1, warm_start=True, warm_start_strategy='monotone', warm_start_constant_hidden_layers=0, warm_start_constant_category='very_small', max_generations=10, pop_size=64, mutation_rate=0.1, mutation_prob=0.15, parents_ratio=0.5, early_stopping=0, convergence=True, convergence_measure='min', timer_in_seconds=43200, force_target_type=None, plot=False, output_file_path='data', deep_learning_type='batch', deep_learning_output_size=None, log=False, feature_engineer=_feature_learning_engineer, sampling_function=None) _ga_using_new_features.optimize() self.assertTrue( expr=_ga_using_new_features.final_generation[ _ga_using_new_features.best_individual_idx]['fitness_score'] >= _ga.final_generation[_ga.best_individual_idx]['fitness_score'])