from src.data_interface import d, L
from src.utils import get_path

L = list(L)
D = d.view()

path = get_path(__file__) + '/..'
savepath_template = '{0}/plots/scatterplots/{1}-{2}.pdf'

rows = np.random.random_integers(1,200000,400)

data = D[rows,:]

def is_alert_colors(is_alert):
    return 'blue' if is_alert==1 else 'red'

colors = map(is_alert_colors, data[:,L.index('IsAlert')])

#features = ['P6', 'V1', 'V3', 'V6']
features = L[4:]

for f1, f2 in it.combinations(features, 2):
    idx1, idx2 = L.index(f1), L.index(f2)
    plt.title('Feature {0} vs {1}'.format(f1, f2), {'size': 20})
    plt.scatter(data[:,idx1], data[:,idx2], c=colors)
    plt.gca().set_xlabel(f1, {'size': 18})
    plt.gca().set_ylabel(f2, {'size': 18})
    plt.savefig(savepath_template.format(path,f1,f2), format='pdf', papertype='a4')
    plt.cla()

Пример #2
0
pca.fit(X)

plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o')
ax = plt.gca()
plt.title('Cumulative percentage of total variation explained by principal components')
ax.set_xlabel('Principal component')
ax.set_ylabel('% of total variation')
plt.savefig('{0}/plots/pca-variation-explained.pdf'.format(path), papertype='a4', format='pdf')
plt.cla()

W = pca.components_[:,0:3]
X_p = np.dot(X,W)

rnd_rows = np.random.random_integers(0, X.shape[0], 120)

colors = map(bool_to_color, d.view()[rnd_rows,L.index('IsAlert')])
plt.scatter(X_p[rnd_rows,0], X_p[rnd_rows,1], c=colors)
plt.title('Scatter plot of 1. and 2. pricipal component')
ax = plt.gca()
ax.set_xlabel('1. Pricipal component')
ax.set_ylabel('2. Principal component')
plt.savefig('{0}/plots/scatter-principal-components.pdf'.format(path), papertype='a4', format='pdf')
plt.cla()

for i in range(8):
    rnd_rows = np.random.random_integers(0, X.shape[0], 120)
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X_p[rnd_rows,0], X_p[rnd_rows,1], X_p[rnd_rows,2], c=colors)
    plt.title('Scatter of 1., 2. and 3. pricipal component')
    ax = plt.gca()