def main(): # TODO List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # TODO Load data observations = None print('Observation columns: {}'.format(list(observations.columns))) # TODO Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # TODO List out variable types data_type_dict = { 'numerical': [], 'categorical': [], 'text': [], 'timeseries': [] } output_var = None # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # TODO Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def main(): # List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # Load data observations = lib.load_lending_club() print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format(observations['loan_status'].value_counts())) # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # List out variable types data_type_dict = {'numerical': ['loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies'], 'categorical': ['term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state', 'application_type', 'disbursement_method'], 'text': ['desc', 'purpose', 'title']} output_var = 'loan_status' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def test_supervised(self): observations = lib.load_lending_club() # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Supervised data_type_dict = { 'numerical': [ 'loan_amnt', 'annual_inc', 'open_acc', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'pub_rec_bankruptcies' ], 'categorical': [ 'term', 'grade', 'emp_length', 'home_ownership', 'loan_status', 'addr_state', 'application_type' ], 'text': ['desc', 'purpose', 'title'] } output_var = 'loan_status' auto = Automater(data_type_dict=data_type_dict, output_var=output_var) self.assertTrue(auto.supervised) expected_input_vars = reduce(lambda x, y: x + y, data_type_dict.values()) expected_input_vars.remove(output_var) self.assertCountEqual(expected_input_vars, auto.input_vars) self.assertEqual(output_var, auto.output_var) self.assertTrue(isinstance(auto.input_mapper, DataFrameMapper)) self.assertTrue(isinstance(auto.output_mapper, DataFrameMapper)) self.assertFalse(auto.fitted) self.assertRaises(AssertionError, auto._check_fitted) # Test fit auto.fit(train_observations) self.assertTrue(auto.fitted) self.assertIsNotNone(auto.input_mapper.built_features) self.assertTrue(isinstance(auto.input_layers, list)) self.assertEqual(len(expected_input_vars), len(auto.input_layers)) self.assertIsNotNone(auto.input_nub) self.assertIsNotNone(auto.output_nub) self.assertIsNotNone(auto.output_mapper.built_features) # Test transform, df_out=False train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) self.assertTrue(isinstance(test_X, list)) self.assertTrue(isinstance(test_y, numpy.ndarray)) self.assertEqual(test_observations.shape[0], test_X[0].shape[0]) # Correct number of rows back self.assertEqual(test_observations.shape[0], test_y.shape[0]) # Correct number of rows back # Test transform, df_out=True transformed_observations = auto.transform(test_observations, df_out=True) self.assertTrue(isinstance(transformed_observations, pandas.DataFrame)) self.assertEqual( test_observations.shape[0], transformed_observations.shape[0]) # Correct number of rows back # Test suggest_loss suggested_loss = auto.suggest_loss() self.assertTrue(callable(suggested_loss)) # Test model building x = auto.input_nub x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) pred_y = model.predict(test_X) # Test inverse_transform_output inv_transformed_pred_y = auto.inverse_transform_output(pred_y) self.assertEqual(test_observations.shape[0], inv_transformed_pred_y.shape[0])
def main(): # List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = True # Load data observations = lib.load_instanbul_stocks(as_ts=True) print('Observation columns: {}'.format(list(observations.columns))) # Notice that the lagged variables are an array of values print('One of the lagged variables: \n{}'.format( observations['ise_lagged'])) # Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # List out variable types data_type_dict = { 'numerical': ['ise', 'ise.1', 'sp', 'dax', 'ftse', 'nikkei', 'bovespa', 'eu', 'em'], 'categorical': [], 'text': [], 'timeseries': ['ise_lagged', 'ise.1_lagged', 'sp_lagged', 'dax_lagged'] } output_var = 'ise' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='adam', loss=auto.suggest_loss()) model.fit(train_X, train_y) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
def main(): # TODO List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = False # TODO Load data observations = lib.load_titanic() print('Observation columns: {}'.format(list(observations.columns))) # TODO Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # TODO List out variable types data_type_dict = {'numerical': ['age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'], 'categorical': ['survived', 'pclass', 'sex'], 'text': ['name'], 'timeseries': [] } output_var = 'survived' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # TODO Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) print(f'Suggested loss: {auto.suggest_loss()}\n\n') model.compile(optimizer='adam', loss=auto.suggest_loss(), metrics=['acc']) # model.fit(train_X, train_y) model.summary() print('\n\n' + '^' * 21) print(train_X) print('\n\n' + '^' * 21) print(train_y) model.fit(train_X, train_y, batch_size=32, epochs=1, validation_split=0.1) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))