def test_best_creature_3d(self): evolution = evogression.Evolution('z', surface_3d_data, num_creatures=30000, num_cycles=10, num_cpu=3) z_test = [ evolution.predict(d, 'pred')['pred'] for d in surface_3d_data ] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x = [point_dict['x'] for point_dict in surface_3d_data] y = [point_dict['y'] for point_dict in surface_3d_data] z = [point_dict['z'] for point_dict in surface_3d_data] ax.scatter3D(x, y, z) ax.scatter3D(x, y, z_test) ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') plt.title('Surface Regression - Evolution Test') plt.show()
def main(): evolution = evogression.Evolution('y', parabola_data, num_creatures=10000, num_cycles=10, num_cpu=1, initial_creature_creation_multip=False, optimize=3)
def test_load_and_predict(self): ''' Test ability of Evolution class to handle pandas DataFrames. ''' df = pandas.DataFrame(surface_3d_data) evolution = evogression.Evolution('z', df, num_creatures=500, num_cycles=5, use_multip=False, optimize=3) predicted = evolution.predict(df) self.assertTrue(type(predicted) == pandas.DataFrame) self.assertTrue(len(predicted.columns) == 4) self.assertTrue('z_PREDICTED' in predicted.columns) single_prediction = evolution.predict({'x': 0, 'y': 0}, 'z_test') self.assertTrue('z_test' in single_prediction) self.assertTrue(len(single_prediction) == 3)
def test_none_fill(self): ''' Test ability of CreatureEvolution class to handle None values in input data by filling them with the median of populated values. ''' test_data = linear_data test_data[1] = {'x': 5, 'y': None} test_data[4] = {'x': None, 'y': 11.2} test_data[5] = {'x': float('nan'), 'y': 11.2} test_data[6] = {'x': None, 'y': float('nan')} try: evolution = evogression.Evolution('y', test_data, num_creatures=100, num_cycles=1, use_multip=False) test_passed = True except TypeError: test_passed = False self.assertTrue(test_passed)
def test_breast_cancer_detection(self): df = pandas.read_csv('breast-cancer-wisconsin.data') df.drop('id_num', axis=1, inplace=True) df = df.replace('?', None) for col in df.columns: vals = df[col].tolist() if '?' in vals: print(vals) try: df[col] = df[col].map(lambda x: float(x) if x is not None else x) except: print(f'ERROR column: {col}') regression_data = df.to_dict('records') evolution = evogression.Evolution('benign2_or_malignant4', regression_data, target_num_creatures=5000, num_cycles=10) evolution.best_creature.output_python_regression_module() output_data = evolution.add_predictions_to_data(regression_data) output_df = pandas.DataFrame(output_data) output_df.to_excel('BreastCancerPredictions.xlsx')
def test_best_creature_evolution(self): evolution = evogression.Evolution('y', linear_data, num_creatures=10000, num_cycles=3, optimize=5) best_creature = evolution.best_creature print('\nBest creature found!') print(best_creature) predictions = [{'x': i / 2} for i in range(6, 25)] predictions = evolution.predict(predictions) calculation_x_values = [point['x'] for point in predictions] calculated_y_values = [point['y_PREDICTED'] for point in predictions] plt.scatter([d['x'] for d in linear_data], [d['y'] for d in linear_data]) plt.plot(calculation_x_values, calculated_y_values, 'g--') plt.xlabel('x') plt.ylabel('y') plt.title('Linear Regression - Evolution Test') plt.show()
def test_best_creature_parabola_regression_evolution(self): evolution = evogression.Evolution('y', parabola_data, num_creatures=5000, num_cycles=7, force_num_layers=0, standardize=True) best_creature = evolution.best_creature try: standardizer = evolution.standardizer except: pass calculation_x_values = [i for i in range(-20, 21)] calculated_y_values = [] for x in calculation_x_values: try: standardized_dict = standardizer.convert_parameter_dict_to_standardized( {'x': x}) standardized_value = best_creature.calc_target( standardized_dict) calculated_y_values.append( standardizer.unstandardize_value('y', standardized_value)) except: value = best_creature.calc_target({'x'}) calculated_y_values.append(value) plt.scatter([d['x'] for d in parabola_data], [d['y'] for d in parabola_data]) plt.plot(calculation_x_values, calculated_y_values, 'g--') plt.xlabel('x') plt.ylabel('y') plt.title('Parabola Regression - Evolution Test') plt.show()
def test_evolution_memory(self): options = [ (500, 5), (1000, 5), (5000, 5), (5000, 10), (5000, 15), (50000, 5), (50000, 10), (50000, 30), ] evolutions = [ evogression.Evolution('y', parabola_data, num_creatures=t[0], num_cycles=t[1], optimize=False, use_multip=True, clear_creatures=True) for t in options ] memory_strings = [ json.dumps([cr.__dict__ for cr in ev.creatures]) + json.dumps(ev.all_data) + json.dumps(ev.all_data_error_sums) + json.dumps([crlist[0].__dict__ for crlist in ev.best_creatures]) for ev in evolutions ] memory_strings = [{ 'creatures': len(json.dumps([cr.__dict__ for cr in ev.creatures])), 'all_data': len(json.dumps(ev.all_data)), 'error_sums': len(json.dumps(ev.all_data_error_sums)), 'best_creatures': len( json.dumps( [crlist[0].__dict__ for crlist in ev.best_creatures])) } for ev in evolutions] for i, d in enumerate(memory_strings): memory_strings[i]['total'] = sum(d.values()) print('\n\nString sizes of jsoned evolutions:') for option, ms in zip(options, memory_strings): print(f' {option} -> ' + '{:.2E}'.format(ms['total'])) print(f' creatures: ' + '{:.2E}'.format(ms['creatures']) + f' ({round(100 * ms["creatures"] / ms["total"], 1)}%)') print(f' all_data: ' + '{:.2E}'.format(ms['all_data']) + f' ({round(100 * ms["all_data"] / ms["total"], 1)}%)') print(f' error_sums: ' + '{:.2E}'.format(ms['error_sums']) + f' ({round(100 * ms["error_sums"] / ms["total"], 1)}%)') print( f' best_creatures: ' + '{:.2E}'.format(ms['best_creatures']) + f' ({round(100 * ms["best_creatures"] / ms["total"], 1)}%)' + '\n') print('\n\n') breakpoint()