def main(): observations = load_titanic() # Transform the data set, using keras_pandas categorical_vars = ['pclass', 'sex', 'survived'] numerical_vars = [ 'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare' ] text_vars = ['name'] auto = Automater(categorical_vars=categorical_vars, numerical_vars=numerical_vars, text_vars=text_vars, response_var='survived') X, y = auto.fit_transform(observations) # Start model with provided input nub x = auto.input_nub # Fill in your own hidden layers x = Dense(256)(x) x = Dense(256, activation='relu')(x) x = Dense(256)(x) # End model with provided output nub x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy']) # Train model model.fit(X, y, epochs=15, validation_split=.2)
def test_whole(self): # Create datatype datatype = Boolean() # Load observations observations = lib.load_titanic() variable_name = 'survived' # Transform observations mapper = DataFrameMapper( [([variable_name], datatype.default_transformation_pipeline)], df_out=True) transformed_df = mapper.fit_transform(observations) # TODO Create network input_layer, input_nub = datatype.input_nub_generator( variable_name, transformed_df) output_nub = datatype.output_nub_generator(variable_name, transformed_df) x = input_nub x = output_nub(x) model = Model(input_layer, x) model.compile(optimizer='adam', loss=datatype.output_suggested_loss()) pass
def test_empty_strings(self): data = lib.load_titanic() data = data[['name']] ev = EmbeddingVectorizer() ev.fit(data)
def test_create_input_nub(self): data = lib.load_titanic() # One variable text_vars = ['name'] auto = Automater(text_vars=text_vars) auto.fit(data) self.assertEqual(1, len(auto.input_layers))
def main(): # Load data observations = lib.load_titanic() # observations = lib.load_lending_club(test_run=False) print('Observation columns: {}'.format(list(observations.columns))) print('Class balance:\n {}'.format( observations['survived'].value_counts())) # List out variable types numerical_vars = [ 'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare' ] categorical_vars = ['survived', 'pclass', 'sex'] text_vars = ['name'] for var in categorical_vars: observations[var] = observations[var].astype(str) train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # Create and fit Automater auto = Automater(numerical_vars=numerical_vars, categorical_vars=categorical_vars, text_vars=text_vars, response_var='survived') auto.fit(train_observations) # Create and fit keras (deep learning) model # The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and # everything else is core Keras train_X, train_y = auto.transform(train_observations) test_X, test_y = auto.transform(test_observations) x = auto.input_nub x = Dense(32)(x) x = Dense(32, activation='relu')(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy']) model.fit(train_X, train_y) test_y_pred = model.predict(test_X) # Inverse transform model output, to get usable results and save all results test_observations[auto.response_var + '_pred'] = auto.inverse_transform_output(test_y_pred) print('Predictions: {}'.format(test_observations[auto.response_var + '_pred'])) pass
def test_mapper(self): data = lib.load_titanic() transformation_list = [(['name'], [EmbeddingVectorizer(max_sequence_length=12)])] mapper = DataFrameMapper(transformation_list, df_out=True) mapper.fit(data) data_transformed = mapper.transform(data) assert_array_equal([2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1], data_transformed.values[0, :])
def test_fit(self): data = lib.load_titanic() # One variable text_vars = ['name'] auto = Automater(text_vars=text_vars) auto.fit(data) self.assertEqual(Automater, type(auto)) self.assertEqual(text_vars, auto._user_provided_variables) self.assertTrue(auto.fitted) self.assertEqual([['name']], list( map(lambda x: x[0], auto.input_mapper.built_features)))
def test_whole(self): # Create datatype datatype = Numerical() # Load observations observations = lib.load_titanic() # Transform observations mapper = DataFrameMapper( [(['fare'], datatype.default_transformation_pipeline)], df_out=True) transformed_df = mapper.fit_transform(observations) # Create network input_layer, input_nub = datatype.input_nub_generator( 'fare', transformed_df) output_nub = datatype.output_nub_generator('fare', transformed_df) x = input_nub x = output_nub(x) model = Model(input_layer, x) model.compile(optimizer='adam', loss=datatype.output_suggested_loss())
def test_whole(self): data = lib.load_titanic() msk = numpy.random.rand(len(data)) < 0.95 data_train = data[msk] data_test = data[~msk] text_vars = ['name'] categorical_vars = ['survived'] # Create auto auto = Automater(text_vars=text_vars, categorical_vars=categorical_vars, response_var='survived') # Train auto auto.fit(data_train) X_train, y_train = auto.transform(data) # Create model x = auto.input_nub x = Dense(30, activation='relu')(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) model.compile(optimizer='Adam', loss=auto.loss) # Train DL model model.fit(X_train, y_train) # Transform test set data_test = data_test.drop('survived', axis=1) X_test, y_test = auto.transform(data_test) model.predict(X_test) pass
def test_whole(self): datatype = Text() # Load observations observations = lib.load_titanic() # Transform observations mapper = DataFrameMapper( [(['name'], datatype.default_transformation_pipeline), (['fare'], None)], df_out=True) transformed_df = mapper.fit_transform(observations) # Create network input_layer, input_nub = datatype.input_nub_generator( 'name', transformed_df) output_nub = Dense(1) x = input_nub x = output_nub(x) model = Model(input_layer, x) model.compile(optimizer='adam', loss='mse') pass
def main(): # TODO List out which components are supplied by Automater # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers, # auto.output_nub, and auto.suggest_loss save_results = False # TODO Load data observations = lib.load_titanic() print('Observation columns: {}'.format(list(observations.columns))) # TODO Train /test split train_observations, test_observations = train_test_split(observations) train_observations = train_observations.copy() test_observations = test_observations.copy() # TODO List out variable types data_type_dict = {'numerical': ['age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'], 'categorical': ['survived', 'pclass', 'sex'], 'text': ['name'], 'timeseries': [] } output_var = 'survived' # Create and fit Automater auto = Automater(data_type_dict=data_type_dict, output_var=output_var) auto.fit(train_observations) # Transform data train_X, train_y = auto.fit_transform(train_observations) test_X, test_y = auto.transform(test_observations) # TODO Create and fit keras (deep learning) model. x = auto.input_nub x = Dense(32)(x) x = Dense(32)(x) x = auto.output_nub(x) model = Model(inputs=auto.input_layers, outputs=x) print(f'Suggested loss: {auto.suggest_loss()}\n\n') model.compile(optimizer='adam', loss=auto.suggest_loss(), metrics=['acc']) # model.fit(train_X, train_y) model.summary() print('\n\n' + '^' * 21) print(train_X) print('\n\n' + '^' * 21) print(train_y) model.fit(train_X, train_y, batch_size=32, epochs=1, validation_split=0.1) # Make model predictions and inverse transform model predictions, to get usable results pred_test_y = model.predict(test_X) auto.inverse_transform_output(pred_test_y) # Save all results if save_results: temp_dir = lib.get_temp_dir() model.save(os.path.join(temp_dir, 'model.h5py')) pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb')) pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb')) pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb')) pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb')) pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
from keras import Model from keras.layers import Dense from keras_pandas.Automater import Automater from keras_pandas.lib import load_titanic observations = load_titanic() # Transform the data set, using keras_pandas categorical_vars = ['pclass', 'sex', 'survived'] numerical_vars = [ 'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare' ] text_vars = ['name'] auto = Automater(categorical_vars=categorical_vars, numerical_vars=numerical_vars, text_vars=text_vars, response_var='survived') X, y = auto.fit_transform(observations) # Start model with provided input nub x = auto.input_nub # Fill in your own hidden layers x = Dense(32)(x) x = Dense(32, activation='relu')(x) x = Dense(32)(x) # End model with provided output nub x = auto.output_nub(x)