示例#1
0

def create_scatter_plot(dataset,
                        featureX,
                        featureY,
                        filename,
                        filepath='images/scatter/',
                        color='red'):
    fig, ax = plt.subplots(nrows=1, ncols=1)
    dataset.plot(kind='scatter', x=featureX, y=featureY, color=color, ax=ax)
    plt.savefig(filepath + filename)
    plt.close(fig)


# the correlated features
create_scatter_plot(pull_features(training_data), 'feature14', 'feature44',
                    'scatter_correlation_all_eras.png')
create_scatter_plot(pull_features(training_data, for_era=1),
                    'feature14',
                    'feature44',
                    'scatter_correlation_era1.png',
                    color='orange')

# the independent features
create_scatter_plot(pull_features(training_data),
                    'feature2',
                    'feature40',
                    'scatter_independent_all_eras.png',
                    color='deepskyblue')
create_scatter_plot(pull_features(training_data, for_era=1),
                    'feature2',
示例#2
0
from pandas.plotting import scatter_matrix
from picipixi import create_gif

from loader import load_training, pull_features, pull_features_and_era_label


def create_scatter_plot(dataset,
                        filename,
                        featureX='feature14',
                        featureY='feature44',
                        filepath='images/scatter/animation/',
                        color='red'):
    era_plot = dataset.plot(kind='scatter',
                            x=featureX,
                            y=featureY,
                            color=color)
    era_plot.set_ylim(0.0, 1.0)
    era_plot.set_xlim(0.0, 1.0)
    plt.savefig(filepath + filename)


training_data = load_training()

# create the animation frames
for era in range(1, 121):
    X = pull_features(training_data, for_era=era)
    create_scatter_plot(X, 'scatter_correlation_frame_era' + str(era) + '.png')

# TODO: combine the frames into an animation
# create_gif('images/scatter/scatter_correlation_all_eras.gif', 'images/scatter/animation', 'scatter_correlation_frame_era')
示例#3
0
from loader import load_training, pull_features

X = pull_features(load_training())

print(X.describe())
示例#4
0
    mpimg.imsave(filepath + diff_filename, diff_image)


def calc_mean_squared_diff(imageA_filename,
                           imageB_filename,
                           filepath='images/correlation/'):
    # calculate the mean-square difference between two images over all the pixels in an image
    imageA = mpimg.imread(filepath + imageA_filename)
    imageB = mpimg.imread(filepath + imageB_filename)
    diff = np.sum((imageA.astype("float") - imageB.astype("float"))**2)
    diff /= float(imageA.shape[0] * imageA.shape[1])
    return diff


training_data = load_training()
negative = pull_features(training_data, target_bernie_value=0, for_era=1)
positive = pull_features(training_data, target_bernie_value=1, for_era=1)

create_correlation_matrix(positive, 'correlation_matrix_positive.png')

create_correlation_matrix(negative, 'correlation_matrix_negative.png')

calc_image_diff('correlation_matrix_positive.png',
                'correlation_matrix_negative.png', 'correlation_diff_era1.png')

print(
    'Mean squared difference',
    calc_mean_squared_diff('correlation_matrix_positive.png',
                           'correlation_matrix_negative.png'))

# show image diff for all eras
示例#5
0
import pandas as pd
import matplotlib.pyplot as plt

from loader import load_training, pull_features


def create_correlation_matrix(dataset,
                              filename,
                              filepath='images/correlation/',
                              cmap=plt.cm.viridis):
    correlation = dataset.corr()
    fig, ax = plt.subplots()
    matrix = ax.imshow(correlation, cmap=cmap, interpolation='nearest')
    fig.colorbar(matrix)
    tick_marks = [i for i in range(len(dataset.columns))]
    plt.xticks(tick_marks, dataset.columns, rotation='vertical')
    plt.yticks(tick_marks, dataset.columns)
    # now make the axes legible - we don't need them all
    for label in ax.xaxis.get_ticklabels()[1::2]:
        label.set_visible(False)
    for label in ax.yaxis.get_ticklabels()[1::2]:
        label.set_visible(False)
    plt.savefig(filepath + filename)
    plt.clf()


create_correlation_matrix(pull_features(load_training()),
                          'correlation_matrix_all_eras.png')
示例#6
0

def scatter_matrix_variation(dataset,
                             select_features,
                             filename_suffix='',
                             frac=0.1):
    frac = 0.5
    create_scatter_matrix(dataset[select_features].sample(frac=frac),
                          'scatter_matrix' + filename_suffix + '.png')


training_data = load_training()

# create a scatter plot matrix of all the features, but using a reduced sampling of each
scatter_matrix_variation(
    pull_features(training_data, for_era=1),
    ['feature' + str(feature) for feature in range(1, 50)],
    '_era1_1_50',
    frac=0.1)

# create a scatter plot matrix of only hand-selected features
scatter_matrix_variation(
    pull_features(training_data, target_bernie_value=0, for_era=1),
    ['feature' + str(feature) for feature in range(4, 17)],
    '_era1_4_17',
    frac=1.0)

# create a scatter plot matrix of only hand-selected features
scatter_matrix_variation(pull_features(training_data,
                                       target_bernie_value=0,
                                       for_era=1),