import seaborn as sns import matplotlib.pyplot as plt from loader import load_training, pull_features, pull_lots training_data = load_training() dataset = pull_lots(training_data[training_data["era"] == "era1"]) sns.pairplot(dataset, aspect=1) plt.savefig("images/scatter/scatter_matrix_seaborn.png") sns.pairplot(dataset, aspect=1, hue="target_bernie") plt.savefig("images/scatter/scatter_matrix_seaborn_hue.png")
import matplotlib.pyplot as plt from loader import load_training, pull_features, pull_lots # selected_features = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', # 'feature7', 'feature8', 'feature9', 'feature10', 'feature11', 'feature12'] def create_histogram(dataset, selected_features, group): ax = (dataset[selected_features]).hist(ylabelsize=6, xlabelsize=5, bins=20) for axnext in ax: for x in axnext: x.spines['right'].set_visible(False) x.spines['top'].set_visible(False) x.spines['left'].set_visible(False) x.title.set_fontsize(7) plt.subplots_adjust(hspace=0.5, wspace=0.5) plt.savefig('images/other/histogram' + str(group) + '.png') dataset = load_training() for group in range(1, 50 - 9, 9): selected_features = [ 'feature' + str(feature) for feature in range(group, group + 9) ] create_histogram(dataset, selected_features, group)
from loader import load_training, pull_features X = pull_features(load_training()) print(X.describe())
import pandas as pd import matplotlib.pyplot as plt from loader import load_training, pull_features def create_correlation_matrix(dataset, filename, filepath='images/correlation/', cmap=plt.cm.viridis): correlation = dataset.corr() fig, ax = plt.subplots() matrix = ax.imshow(correlation, cmap=cmap, interpolation='nearest') fig.colorbar(matrix) tick_marks = [i for i in range(len(dataset.columns))] plt.xticks(tick_marks, dataset.columns, rotation='vertical') plt.yticks(tick_marks, dataset.columns) # now make the axes legible - we don't need them all for label in ax.xaxis.get_ticklabels()[1::2]: label.set_visible(False) for label in ax.yaxis.get_ticklabels()[1::2]: label.set_visible(False) plt.savefig(filepath + filename) plt.clf() create_correlation_matrix(pull_features(load_training()), 'correlation_matrix_all_eras.png')