return ds def customLoss(yTrue, yPred): return tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(yTrue, yPred, pos_weight=10) ) ## upweight pos error due to 9:1 ratio of neg:pos samples def customMonitor(yTrue, yPred): out = tf.nn.sigmoid(yPred) temp = tf.keras.metrics.binary_accuracy(yTrue, out) return tf.reduce_mean(temp) df, labels, pos_weight = preproc_data.return_data(orig=False) df = (df - df.mean(axis=0)) / df.std( axis=0 ) ## L-BFGS won't converge to good results without normalization, but SGD is OK without it split = 0.2 # batch_size = 50000 # train_ds = df_to_dataset(df.iloc[:np.int(df.shape[0]*(1-2*split)), :], labels[:np.int(df.shape[0]*(1-2*split))], batch_size=batch_size) # val_ds = df_to_dataset(df.iloc[np.int(-2*df.shape[0]*split):np.int(-df.shape[0]*split), :], labels[np.int(-2*df.shape[0]*split):np.int(-df.shape[0]*split)], shuffle=False, batch_size=batch_size) # test_ds = df_to_dataset(df.iloc[np.int(-df.shape[0]*split):, :], labels[np.int(-df.shape[0]*split):], shuffle=False, batch_size=batch_size) # actfun = tf.nn.tanh # model = tf.keras.Sequential() # model.add(tf.keras.layers.Dense(4, activation=actfun, input_dim=df.shape[1]))
import pandas as pd import preproc_data import matplotlib.pyplot as plt import numpy as np import seaborn as sns ########### general investigative plots ############## df, _, _ = preproc_data.return_data(orig=True) ax = df.boxplot(column='age', by='job', rot=45) ax.set_title('') ax.set_ylabel('age') plt.savefig(r'', bbox_inches='tight') df.groupby(['month', 'y']).size().unstack().plot(kind='bar', stacked=True) plt.gca().set_ylabel('count') plt.savefig(r'', bbox_inches='tight') # df['pdays'].plot.hist(bins=50) # plt.gca().set_ylabel('count') # plt.savefig(r'', bbox_inches='tight') df['pdays'].quantile( q=[0, 0.005, 0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975, 0.995, 1]) df.boxplot(column='pdays') ###################### ANN Results ################################### ax = pd.DataFrame(np.hstack( (labels, tf.nn.sigmoid(model(df.values.astype(np.float32))).numpy())), columns=['result', 'predicted']).boxplot(column='predicted', by='result') plt.gca().set_title('') plt.gca().set_ylabel('predicted')