Exemplo n.º 1
0
 def test_optimize_modeling(self):
     _feature_engineer: FeatureEngineer = FeatureEngineer(
         df=DATA_SET,
         target_feature='AveragePrice',
         temp_dir=DATA_DIR,
         auto_typing=False)
     _feature_engineer.set_predictors(exclude_original_data=False)
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=_feature_engineer.get_target(),
         input_file_path=None,
         train_data_file_path=None,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=_feature_engineer.get_predictors(),
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['cat'],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=2,
         pop_size=4,
         mutation_rate=0.1,
         mutation_prob=0.85,
         parents_ratio=0.5,
         early_stopping=0,
         convergence=True,
         convergence_measure='median',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         verbose=False,
         checkpoint=True,
         feature_engineer=_feature_engineer,
         sampling_function=None)
     _ga.optimize()
     self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <=
                     _ga.evolution_gradient.get('max')[-1])
Exemplo n.º 2
0
 def test_optimize_modeling_text_clustering(self):
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=None,
         input_file_path=None,
         train_data_file_path=DATA_FILE_PATH_CLUSTER,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=[PREDICTOR_CLUSTER],
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['gsdmm'
                 ],  #[np.random.choice(a=list(CLUSTER_ALGORITHMS.keys()))],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=2,
         pop_size=3,
         mutation_prob=0.5,
         mutation_rate=0.85,
         early_stopping=0,
         convergence=True,
         convergence_measure='median',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         feature_engineer=None,
         sampling_function=None,
         **dict(sep=',', tokenize=True))
     _ga.optimize()
     self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <=
                     _ga.evolution_gradient.get('max')[-1])
Exemplo n.º 3
0
 def test_optimize_modeling_text_classification(self):
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=TARGET_TEXT,
         input_file_path=None,
         train_data_file_path=TRAIN_DATA_PATH_TEXT,
         test_data_file_path=TEST_DATA_PATH_TEXT,
         valid_data_file_path=VALIDATION_DATA_PATH_TEXT,
         df=None,
         data_set=None,
         features=PREDICTORS_TEXT,
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['trans'],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=1,
         pop_size=4,
         mutation_rate=0.5,
         mutation_prob=0.85,
         early_stopping=0,
         convergence=True,
         convergence_measure='median',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=UNIQUE_LABELS,
         log=False,
         feature_engineer=None,
         sampling_function=None,
         **dict(sep=','))
     _ga.optimize()
     self.assertTrue(expr=_ga.evolution_gradient.get('max')[0] <=
                     _ga.evolution_gradient.get('max')[-1])
 def test_save_evolution(self):
     _feature_engineer: FeatureEngineer = FeatureEngineer(
         df=DATA_SET, target_feature='AveragePrice')
     _feature_engineer.set_predictors(exclude_original_data=False)
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=_feature_engineer.get_target(),
         input_file_path=None,
         train_data_file_path=None,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=_feature_engineer.get_predictors(),
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['cat'],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=5,
         pop_size=64,
         mutation_rate=0.1,
         mutation_prob=0.15,
         parents_ratio=0.5,
         early_stopping=0,
         convergence=True,
         convergence_measure='median',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         feature_engineer=_feature_engineer,
         sampling_function=None)
     _ga.optimize()
     _ga.save_evolution(ga=True,
                        model=True,
                        evolution_history=True,
                        generation_history=True,
                        final_generation=True)
     _found_pickles: List[bool] = []
     for pickle in [
             'model', 'GeneticAlgorithm', 'evolution_history',
             'generation_history', 'final_generation'
     ]:
         if os.path.isfile('data/{}.p'.format(pickle)):
             _found_pickles.append(True)
         else:
             _found_pickles.append(False)
     self.assertTrue(expr=all(_found_pickles))
 def test_visualize_reg(self):
     _feature_engineer: FeatureEngineer = FeatureEngineer(
         df=DATA_SET, target_feature='AveragePrice')
     _feature_engineer.set_predictors(exclude_original_data=False)
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=_feature_engineer.get_target(),
         input_file_path=None,
         train_data_file_path=None,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=_feature_engineer.get_predictors(),
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=None,
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=5,
         pop_size=4,
         mutation_rate=0.1,
         mutation_prob=0.15,
         parents_ratio=0.5,
         early_stopping=0,
         convergence=True,
         convergence_measure='median',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         feature_engineer=_feature_engineer,
         sampling_function=None)
     _ga.optimize()
     _ga.visualize(results_table=True,
                   model_distribution=True,
                   model_evolution=True,
                   param_distribution=True,
                   train_time_distribution=True,
                   breeding_map=True,
                   breeding_graph=True,
                   fitness_distribution=True,
                   fitness_evolution=True,
                   fitness_dimensions=True,
                   per_generation=True,
                   prediction_of_best_model=True,
                   epoch_stats=True)
     _found_plot: List[bool] = []
     for plot in [
             'ga_metadata_table',
             'ga_model_evolution',
             'ga_model_distribution',
             #'ga_parameter_treemap',
             'ga_training_time_distribution',
             #'ga_breeding_heatmap',
             #'ga_breeding_graph',
             'ga_fitness_score_distribution_per_generation',
             'ga_metadata_evolution_coords_actor_only',
             'ga_evolution_fitness_score',
             'ga_prediction_evaluation_coords',
             'ga_prediction_scatter_contour'
     ]:
         if os.path.isfile('data/{}.html'.format(plot)):
             _found_plot.append(True)
         else:
             _found_plot.append(False)
     self.assertTrue(expr=all(_found_plot))
 def test_ga_clf(self):
     _feature_engineer: FeatureEngineer = FeatureEngineer(
         df=DATA_SET, target_feature='type')
     _feature_engineer.set_predictors(exclude=None,
                                      exclude_original_data=False)
     _ga: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=_feature_engineer.get_target(),
         input_file_path=None,
         train_data_file_path=None,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=_feature_engineer.get_predictors(),
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['cat'],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=10,
         pop_size=64,
         mutation_rate=0.1,
         mutation_prob=0.15,
         parents_ratio=0.5,
         early_stopping=0,
         convergence=True,
         convergence_measure='min',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         feature_engineer=_feature_engineer,
         sampling_function=None)
     _ga.optimize()
     _feature_learning: FeatureLearning = FeatureLearning(
         feature_engineer=_feature_engineer,
         df=None,
         file_path=None,
         target=_feature_engineer.get_target(),
         force_target_type=None,
         max_features=-1,
         keep_fittest_only=True,
         train_categorical_critic=False,
         train_continuous_critic=False,
         engineer_time_disparity=True,
         engineer_categorical=True,
         engineer_text=True,
         output_path='data')
     _feature_learning_engineer = _feature_learning.ga()
     _ga_using_new_features: GeneticAlgorithm = GeneticAlgorithm(
         mode='model',
         target=_feature_learning_engineer.get_target(),
         input_file_path=None,
         train_data_file_path=None,
         test_data_file_path=None,
         valid_data_file_path=None,
         df=None,
         data_set=None,
         features=_feature_learning_engineer.get_predictors(),
         re_split_data=False,
         re_sample_cases=False,
         re_sample_features=False,
         re_populate=True,
         max_trials=2,
         max_features=-1,
         labels=None,
         models=['cat'],
         model_params=None,
         burn_in_generations=-1,
         warm_start=True,
         warm_start_strategy='monotone',
         warm_start_constant_hidden_layers=0,
         warm_start_constant_category='very_small',
         max_generations=10,
         pop_size=64,
         mutation_rate=0.1,
         mutation_prob=0.15,
         parents_ratio=0.5,
         early_stopping=0,
         convergence=True,
         convergence_measure='min',
         timer_in_seconds=43200,
         force_target_type=None,
         plot=False,
         output_file_path='data',
         deep_learning_type='batch',
         deep_learning_output_size=None,
         log=False,
         feature_engineer=_feature_learning_engineer,
         sampling_function=None)
     _ga_using_new_features.optimize()
     self.assertTrue(
         expr=_ga_using_new_features.final_generation[
             _ga_using_new_features.best_individual_idx]['fitness_score'] >=
         _ga.final_generation[_ga.best_individual_idx]['fitness_score'])