示例#1
0
    return ds


def customLoss(yTrue, yPred):
    return tf.reduce_mean(
        tf.nn.weighted_cross_entropy_with_logits(yTrue, yPred, pos_weight=10)
    )  ## upweight pos error due to 9:1 ratio of neg:pos samples


def customMonitor(yTrue, yPred):
    out = tf.nn.sigmoid(yPred)
    temp = tf.keras.metrics.binary_accuracy(yTrue, out)
    return tf.reduce_mean(temp)


df, labels, pos_weight = preproc_data.return_data(orig=False)

df = (df - df.mean(axis=0)) / df.std(
    axis=0
)  ## L-BFGS won't converge to good results without normalization, but SGD is OK without it

split = 0.2

# batch_size = 50000
# train_ds = df_to_dataset(df.iloc[:np.int(df.shape[0]*(1-2*split)), :], labels[:np.int(df.shape[0]*(1-2*split))], batch_size=batch_size)
# val_ds = df_to_dataset(df.iloc[np.int(-2*df.shape[0]*split):np.int(-df.shape[0]*split), :], labels[np.int(-2*df.shape[0]*split):np.int(-df.shape[0]*split)], shuffle=False, batch_size=batch_size)
# test_ds = df_to_dataset(df.iloc[np.int(-df.shape[0]*split):, :], labels[np.int(-df.shape[0]*split):], shuffle=False, batch_size=batch_size)

# actfun = tf.nn.tanh
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(4, activation=actfun, input_dim=df.shape[1]))
示例#2
0
import pandas as pd
import preproc_data
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

########### general investigative plots ##############
df, _, _ = preproc_data.return_data(orig=True)
ax = df.boxplot(column='age', by='job', rot=45)
ax.set_title('')
ax.set_ylabel('age')
plt.savefig(r'', bbox_inches='tight')

df.groupby(['month', 'y']).size().unstack().plot(kind='bar', stacked=True)
plt.gca().set_ylabel('count')
plt.savefig(r'', bbox_inches='tight')

# df['pdays'].plot.hist(bins=50)
# plt.gca().set_ylabel('count')
# plt.savefig(r'', bbox_inches='tight')
df['pdays'].quantile(
    q=[0, 0.005, 0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975, 0.995, 1])
df.boxplot(column='pdays')

###################### ANN Results ###################################
ax = pd.DataFrame(np.hstack(
    (labels, tf.nn.sigmoid(model(df.values.astype(np.float32))).numpy())),
                  columns=['result', 'predicted']).boxplot(column='predicted',
                                                           by='result')
plt.gca().set_title('')
plt.gca().set_ylabel('predicted')