def main(): # TODO List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # TODO Load data observations = None print('Observation columns: {}'.format(list(observations.columns))) # TODO Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # TODO List out variable types data_type_dict = { 'numerical': [], 'categorical': [], 'text': [], 'timeseries': [] } output_var = None # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # TODO Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def test_transform_with_response(self): train_df = lib.load_mushroom() # Two numerical variables, df_out = False test_columns = ['odor', 'habitat'] auto = Automater(categorical_vars=test_columns, df_out=False, response_var='habitat') auto.fit(train_df) (X, y) = auto.transform(train_df) self.assertEqual((8124, ), X[0].shape) # Two numerical variables, df_out = True test_columns = ['odor', 'habitat'] auto = Automater(categorical_vars=test_columns, df_out=True, response_var='habitat') auto.fit(train_df) transformed = auto.transform(train_df) self.assertEqual(8124, len(transformed.index)) self.assertEqual((8124, 2), transformed.shape) self.assertCountEqual(test_columns, transformed.columns) # Test w/ response var unavailable. test_columns = ['odor'] test_df = train_df[test_columns] transformed = auto.transform(test_df) self.assertEqual(8124, len(transformed.index)) self.assertEqual((8124, 1), transformed.shape) self.assertCountEqual(test_columns, transformed.columns)
def test_unsupervised(self): observations = lib.load_lending_club() # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Unsupervised data_type_dict = { 'numerical': [ 'loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies' ], 'categorical': [ 'term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state', 'application_type' ], 'text': ['desc', 'purpose', 'title'] } auto = Automater(data_type_dict=data_type_dict) self.assertFalse(auto.supervised) expected_input_vars = reduce(lambda x, y: x + y, data_type_dict.values()) self.assertCountEqual(expected_input_vars, auto.input_vars) self.assertEqual(None, auto.output_var) self.assertTrue(isinstance(auto.input_mapper, DataFrameMapper)) self.assertIsNone(auto.output_mapper) self.assertFalse(auto.fitted) self.assertRaises(AssertionError, auto._check_has_response_var) # Test fit auto.fit(train_observations) self.assertTrue(auto.fitted) self.assertIsNotNone(auto.input_mapper.built_features) self.assertTrue(isinstance(auto.input_layers, list)) self.assertEqual(len(expected_input_vars), len(auto.input_layers)) self.assertIsNotNone(auto.input_nub) self.assertIsNone(auto.output_nub) self.assertIsNone(auto.output_mapper) # Test transform, df_out=False X, y = auto.transform(test_observations) self.assertTrue(isinstance(X, list)) self.assertIsNone(y) self.assertEqual(test_observations.shape[0], X[0].shape[0]) # Correct number of rows back # Test transform, df_out=True transformed_observations = auto.transform(test_observations, df_out=True) self.assertTrue(isinstance(transformed_observations, pandas.DataFrame)) self.assertEqual( test_observations.shape[0], transformed_observations.shape[0]) # Correct number of rows back
def main(): # List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # Load data observations = lib.load_lending_club() print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format(observations['loan_status'].value_counts())) # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # List out variable types data_type_dict = {'numerical': ['loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies'], 'categorical': ['term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state', 'application_type', 'disbursement_method'], 'text': ['desc', 'purpose', 'title']} output_var = 'loan_status' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def main(): # Load data observations = lib.load_mushroom() # observations = lib.load_lending_club(test_run=False) print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format(observations['class'].value_counts())) # List out variable types numerical_vars = [] categorical_vars = [ 'class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat' ] text_vars = [] train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Create and fit Automater auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars, response_var='class') auto.fit(train_observations) # Create and fit keras (deep learning) model # The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and # everything else is core Keras train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) x = auto.input_nub x = Dense(32)(x) x = Dense(32, activation='relu')(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy']) model.fit(train_X, train_y) test_y_pred = model.predict(test_X) # Inverse transform model output, to get usable results and save all results test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred) print('Predictions: {}'.format(test_observations[auto.response_var + '_pred'])) pass
def test_create_input_nub(self): data = lib.load_titanic() # One variable text_vars = ['name'] auto = Automater(text_vars=text_vars) auto.fit(data) self.assertEqual(1, len(auto.input_layers))
def main(): # Load data observations = lib.load_titanic() # observations = lib.load_lending_club(test_run=False) print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format( observations['survived'].value_counts())) # List out variable types numerical_vars = [ 'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare' ] categorical_vars = ['survived', 'pclass', 'sex'] text_vars = ['name'] for var in categorical_vars: observations[var] = observations[var].astype(str) train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Create and fit Automater auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars, response_var='survived') auto.fit(train_observations) # Create and fit keras (deep learning) model # The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and # everything else is core Keras train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) x = auto.input_nub x = Dense(32)(x) x = Dense(32, activation='relu')(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy']) model.fit(train_X, train_y) test_y_pred = model.predict(test_X) # Inverse transform model output, to get usable results and save all results test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred) print('Predictions: {}'.format(test_observations[auto.response_var + '_pred'])) pass
def test_fit(self): train_df = lib.load_mushroom() # Two variables mushroom_categorical_cols = ['odor', 'habitat'] auto = Automater(categorical_vars=mushroom_categorical_cols) auto.fit(train_df) self.assertEqual(Automater, type(auto)) self.assertEqual(mushroom_categorical_cols, auto._user_provided_variables) self.assertTrue(auto.fitted) # Assert that transformation pipline has been built / trained self.assertEqual([['odor'], ['habitat']], list(map(lambda x: x[0], auto.input_mapper.built_features)))
def test_fit(self): data = lib.load_titanic() # One variable text_vars = ['name'] auto = Automater(text_vars=text_vars) auto.fit(data) self.assertEqual(Automater, type(auto)) self.assertEqual(text_vars, auto._user_provided_variables) self.assertTrue(auto.fitted) self.assertEqual([['name']], list( map(lambda x: x[0], auto.input_mapper.built_features)))
def test_fit(self): iris_df = lib.load_iris() # One variable iris_numerical_cols = ['sepal_length'] auto = Automater(numerical_vars=iris_numerical_cols) auto.fit(iris_df) self.assertEqual(Automater, type(auto)) self.assertEqual(iris_numerical_cols, auto._user_provided_variables) self.assertTrue(auto.fitted) # Assert that transformation pipline has been built / trained self.assertEqual([['sepal_length']], list( map(lambda x: x[0], auto.input_mapper.built_features)))
def main(): # Load data observations = lib.load_instanbul_stocks(as_ts=True) print('Observation columns: {}'.format(list(observations.columns))) # Heuristic data transformations # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # List out variable types timeseries_vars = ['ise_lagged', 'ise.1_lagged', 'sp_lagged', 'dax_lagged'] numerical_vars = ['ise'] # Create and fit Automater auto = Automater(numerical_vars=numerical_vars, timeseries_vars=timeseries_vars, response_var='ise') auto.fit(train_observations) # Create and fit keras (deep learning) model. # The auto.transform, auto.input_nub, auto.input_layers, auto.output_nub, and auto.loss are provided by # keras-pandas, and everything else is core Keras x = auto.input_nub x = Dense(16)(x) x = Dense(16, activation='relu')(x) x = Dense(16)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.loss) train_X, train_y = auto.transform(train_observations) model.fit(train_X, train_y) # Inverse transform model output, to get usable results test_X, test_y = auto.transform(test_observations) test_y_pred = model.predict(test_X) test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred) print('Predictions: {}'.format(test_observations[auto.response_var + '_pred'])) # TODO Save all results pass
def test_transform(self): iris_df = lib.load_iris() # Two numerical variables, df_out = False iris_numerical_cols = ['sepal_length', 'sepal_width'] auto = Automater(numerical_vars=iris_numerical_cols, df_out=False) auto.fit(iris_df) (X, y) = auto.transform(iris_df) self.assertEqual((150, ), X[0].shape) # Two numerical variables, df_out = True iris_numerical_cols = ['sepal_length', 'sepal_width'] auto = Automater(numerical_vars=iris_numerical_cols, df_out=True) auto.fit(iris_df) transformed = auto.transform(iris_df) self.assertEqual(150, len(transformed.index)) self.assertEqual((150, 2), transformed.shape) self.assertCountEqual(iris_numerical_cols, transformed.columns)
def test_transform_no_response(self): train_df = lib.load_mushroom() # Two numerical variables, df_out = False test_columns = ['odor', 'habitat'] auto = Automater(categorical_vars=test_columns, df_out=False) auto.fit(train_df) (X, y) = auto.transform(train_df) self.assertEqual((8124, ), X[0].shape) self.assertEqual(None, y) # Two numerical variables, df_out = True test_columns = ['odor', 'habitat'] auto = Automater(categorical_vars=test_columns, df_out=True) auto.fit(train_df) transformed = auto.transform(train_df) self.assertEqual(8124, len(transformed.index)) self.assertEqual((8124, 2), transformed.shape) self.assertCountEqual(test_columns, transformed.columns)
def test_boolean(self): observations = lib.load_mushroom() observations['population_bool'] = observations['population'] == 's' msk = numpy.random.rand(len(observations)) < 0.95 mushroom_train = observations[msk] mushroom_test = observations[~msk] categorical_vars = ['odor', 'habitat', 'class'] boolean_vars = ['population_bool'] auto = Automater(categorical_vars=categorical_vars, boolean_vars=boolean_vars, response_var='class') auto.fit(mushroom_train) X_train, y_train = auto.transform(mushroom_train) # Extract input_nub from auto input_nub = auto.input_nub # Extract output_nub from auto output_nub = auto.output_nub # Create DL model x = input_nub x = Dense(30)(x) x = output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss) # Train DL model model.fit(X_train, y_train) # Transform test set mushroom_test = mushroom_test.drop('class', axis=1) X_test, y_test = auto.transform(mushroom_test) model.predict(X_test)
def test_categorical_whole(self): # St up data set mushroom_df = lib.load_mushroom() msk = numpy.random.rand(len(mushroom_df)) < 0.95 mushroom_train = mushroom_df[msk] mushroom_test = mushroom_df[~msk] categorical_vars = ['odor', 'habitat', 'population', 'class'] # Create auto auto = Automater(categorical_vars=categorical_vars, response_var='class') # Train auto auto.fit(mushroom_train) X_train, y_train = auto.transform(mushroom_train) # Extract input_nub from auto input_nub = auto.input_nub # Extract output_nub from auto output_nub = auto.output_nub # Create DL model x = input_nub x = Dense(30)(x) x = output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss) # Train DL model model.fit(X_train, y_train) # Transform test set mushroom_test = mushroom_test.drop('class', axis=1) X_test, y_test = auto.transform(mushroom_test) model.predict(X_test) pass
def test_numerical_whole(self): # St up data set iris = lib.load_iris() iris_train = iris[:100] iris_test = iris[101:] iris_numerical_cols = ['sepal_length', 'petal_length'] # Create auto auto = Automater(numerical_vars=iris_numerical_cols, response_var='sepal_length') # Train auto auto.fit(iris_train) X_train, y_train = auto.transform(iris_train) # Extract input_nub from auto input_nub = auto.input_nub # Extract output_nub from auto output_nub = auto.output_nub # Create DL model x = input_nub x = Dense(30)(x) x = output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss) # Train DL model model.fit(X_train, y_train) # Transform test set iris_test = iris_test.drop('sepal_length', axis=1) X_test, y_test = auto.transform(iris_test) model.predict(X_test) pass
def test_transform_no_response(self): data = pandas.DataFrame( data=['john clark', 'sue fox', 'mary lastname'], columns=['name']) # One variable text_vars = ['name'] auto = Automater(text_vars=text_vars) auto.fit(data) (X, y) = auto.transform(data) # Find correct shape self.assertEqual((3, 2), X[0].shape) # Test output values self.assertEqual(None, y) # Test with unseen terms test_data = pandas.DataFrame(data=['Brendan Herger'], columns=['name']) (X_test, y_test) = auto.transform(test_data) self.assertTrue(numpy.array_equal([[0, 0]], X_test[0])) pass
def test_whole(self): data = lib.load_titanic() msk = numpy.random.rand(len(data)) < 0.95 data_train = data[msk] data_test = data[~msk] text_vars = ['name'] categorical_vars = ['survived'] # Create auto auto = Automater(text_vars=text_vars, categorical_vars=categorical_vars, response_var='survived') # Train auto auto.fit(data_train) X_train, y_train = auto.transform(data) # Create model x = auto.input_nub x = Dense(30, activation='relu')(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss) # Train DL model model.fit(X_train, y_train) # Transform test set data_test = data_test.drop('survived', axis=1) X_test, y_test = auto.transform(data_test) model.predict(X_test) pass
def test_timeseries_whole(self): observations = lib.load_instanbul_stocks(as_ts=True) # Train test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Create data type lists timeseries_vars = ['ise_lagged', 'sp_lagged'] numerical_vars = ['ise'] # Create automater auto = Automater(numerical_vars=numerical_vars, timeseries_vars=timeseries_vars, response_var='ise') # Fit automater auto.fit(train_observations) # Create model x = auto.input_nub x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.loss) # Train model train_X, train_y = auto.transform(train_observations) print(len(train_X)) print(train_X[0].shape) model.fit(train_X, train_y) # TODO Use model to predict pass
def main(): # Load data observations = lib.load_lending_club() print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format( observations['loan_status'].value_counts())) # Heuristic data transformations for var in ['int_rate', 'revol_util']: # Strip out percent signs observations[var] = observations[var].apply( lambda x: str(x).replace('%', '')) observations[var] = pandas.to_numeric(observations[var], errors='coerce') for var in ['mths_since_last_delinq', 'annual_inc_joint']: # Heuristic null filling for some variables observations[var] = observations[var].fillna(0) # List out variable types numerical_vars = [ 'loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies' ] categorical_vars = [ 'term', 'grade', 'emp_length', 'home_ownership', 'addr_state', 'application_type', 'disbursement_method' ] text_vars = ['desc', 'purpose', 'title'] # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Create and fit Automater auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars, response_var='loan_amnt') auto.fit(train_observations) # Create and fit keras (deep learning) model # The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and # everything else is core Keras train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) x = auto.input_nub x = Dense(32)(x) x = Dense(32, activation='relu')(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy']) model.fit(train_X, train_y) test_y_pred = model.predict(test_X) # Inverse transform model output, to get usable results and save all results test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred) print('Predictions: {}'.format(test_observations[auto.response_var + '_pred'])) pass
def test_supervised(self): observations = lib.load_lending_club() # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Supervised data_type_dict = { 'numerical': [ 'loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies' ], 'categorical': [ 'term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state', 'application_type' ], 'text': ['desc', 'purpose', 'title'] } output_var = 'loan_status' auto = Automater(data_type_dict=data_type_dict, output_var=output_var) self.assertTrue(auto.supervised) expected_input_vars = reduce(lambda x, y: x + y, data_type_dict.values()) expected_input_vars.remove(output_var) self.assertCountEqual(expected_input_vars, auto.input_vars) self.assertEqual(output_var, auto.output_var) self.assertTrue(isinstance(auto.input_mapper, DataFrameMapper)) self.assertTrue(isinstance(auto.output_mapper, DataFrameMapper)) self.assertFalse(auto.fitted) self.assertRaises(AssertionError, auto._check_fitted) # Test fit auto.fit(train_observations) self.assertTrue(auto.fitted) self.assertIsNotNone(auto.input_mapper.built_features) self.assertTrue(isinstance(auto.input_layers, list)) self.assertEqual(len(expected_input_vars), len(auto.input_layers)) self.assertIsNotNone(auto.input_nub) self.assertIsNotNone(auto.output_nub) self.assertIsNotNone(auto.output_mapper.built_features) # Test transform, df_out=False train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) self.assertTrue(isinstance(test_X, list)) self.assertTrue(isinstance(test_y, numpy.ndarray)) self.assertEqual(test_observations.shape[0], test_X[0].shape[0]) # Correct number of rows back self.assertEqual(test_observations.shape[0], test_y.shape[0]) # Correct number of rows back # Test transform, df_out=True transformed_observations = auto.transform(test_observations, df_out=True) self.assertTrue(isinstance(transformed_observations, pandas.DataFrame)) self.assertEqual( test_observations.shape[0], transformed_observations.shape[0]) # Correct number of rows back # Test suggest_loss suggested_loss = auto.suggest_loss() self.assertTrue(callable(suggested_loss)) # Test model building x = auto.input_nub x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) pred_y = model.predict(test_X) # Test inverse_transform_output inv_transformed_pred_y = auto.inverse_transform_output(pred_y) self.assertEqual(test_observations.shape[0], inv_transformed_pred_y.shape[0])
def main(): # TODO List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = False # TODO Load data observations = lib.load_titanic() print('Observation columns: {}'.format(list(observations.columns))) # TODO Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # TODO List out variable types data_type_dict = {'numerical': ['age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'], 'categorical': ['survived', 'pclass', 'sex'], 'text': ['name'], 'timeseries': [] } output_var = 'survived' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # TODO Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) print(f'Suggested loss: {auto.suggest_loss()}\n\n') model.compile(optimizer='adam', loss=auto.suggest_loss(), metrics=['acc']) # model.fit(train_X, train_y) model.summary() print('\n\n' + '^' * 21) print(train_X) print('\n\n' + '^' * 21) print(train_y) model.fit(train_X, train_y, batch_size=32, epochs=1, validation_split=0.1) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def main(): # List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # Load data observations = lib.load_instanbul_stocks(as_ts=True) print('Observation columns: {}'.format(list(observations.columns))) # Notice that the lagged variables are an array of values print('One of the lagged variables: \n{}'.format( observations['ise_lagged'])) # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # List out variable types data_type_dict = { 'numerical': ['ise', 'ise.1', 'sp', 'dax', 'ftse', 'nikkei', 'bovespa', 'eu', 'em'], 'categorical': [], 'text': [], 'timeseries': ['ise_lagged', 'ise.1_lagged', 'sp_lagged', 'dax_lagged'] } output_var = 'ise' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))