示例#1
0
def pcaPlot(filename):
    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    traintime, testtime, validtime = timestamps

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    start_train, end_train = traintime
    start_test, end_test = testtime
    start_valid, end_valid = validtime

    df_train = utilities.getDataByTimeframe(df, start_train, end_train)
    train_vals = df_train.values
    #train_vals = df.values

    sc = StandardScaler()
    train_vals = sc.fit_transform(train_vals)

    numberOfComponents = 2

    pca = decomposition.PCA(n_components=numberOfComponents)
    pca.fit(train_vals)

    x = df.values
    x = sc.transform(x)
    x = pca.transform(x)

    df_pca = pd.DataFrame(data=x, index=df.index, columns=['pca1', 'pca2'])
    df_pca_train = utilities.getDataByTimeframe(df_pca, start_train, end_train)
    df_pca_test = utilities.getDataByTimeframe(df_pca, end_train, end_test)

    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(1, 1, 1)
    ax.set_xlabel('PCA 1', fontsize=10)
    ax.set_ylabel('PCA 2', fontsize=10)
    ax.set_title('PCA plot', fontsize=12)
    cmap = sns.cubehelix_palette(as_cmap=True)
    indexx = list(range(df_pca_test.shape[0]))
    ax.scatter(df_pca_train['pca1'], df_pca_train['pca2'], c='lightblue')
    points = ax.scatter(df_pca_test['pca1'],
                        df_pca_test['pca2'],
                        c=indexx,
                        cmap=cmap,
                        alpha=0.4)
    fig.colorbar(points)
    plt.show()

    return pca
示例#2
0
def main(filename):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    analysis.pairplot(df)
def main(filename, start, end):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()
    df = utilities.getDataByTimeframe(df, start, end)

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    prints.printDataframe(df)
示例#4
0
def main(filename, numberOfComponents):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    prints.printEmptyLine()
    pca = analysis.pca(df, numberOfComponents, relevantColumns, labelNames)
    prints.printExplainedVarianceRatio(pca)
示例#5
0
def main(filename):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    prints.printEmptyLine()

    covMat = analysis.correlationMatrix(df)
    prints.printCorrelationMatrix(covMat, df, labelNames)
示例#6
0
def main(filename):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    traintime, testtime, validtime = timestamps

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    df_train, df_test = utilities.getTestTrainSplit(df, traintime, testtime)

    analysis.valueDistribution(df_train, df_test)
示例#7
0
    assert not options.machine
    fp = open(options.session, "r")
    session = json.load(fp)
    submitter.setSession(session)
else:
    if options.machine:
        submitter.setMachine(options.machine)
    submitter.start()

# Submit the revisions for every build.
engines = []
for engine_path in options.engines:
    try:
        info = engineInfo.getInfo(engine_path)
        for config_name in options.configs:
            config = configs.getConfig(config_name, info)
            if config.omit():
                continue
            submitter.createBuild(info["engine_type"], config_name,
                                  info["revision"])
        engines.append(engine_path)
    except Exception as e:
        print('Failed to get info about ' + engine_path + '!')
        print('Exception: ' + repr(e))

# Run every benchmark for every build and config
benchmarks = [benchmarks.getBenchmark(i) for i in options.benchmarks]
for benchmark in benchmarks:
    for engine_path in engines:
        info = engineInfo.getInfo(engine_path)
        executor = executors.getExecutor(info)
示例#8
0
    fp = open(options.session, "r")
    session = json.load(fp)
    submitter.setSession(session)
else:
    if options.machine:
        submitter.setMachine(options.machine)
    submitter.start()


# Submit the revisions for every build.
engines = []
for engine_path in options.engines:
    try:
        info = engineInfo.getInfo(engine_path)
        for config_name in options.configs:
            config = configs.getConfig(config_name, info)
            if config.omit():
                continue
            submitter.createBuild(info["engine_type"], config_name, info["revision"])
        engines.append(engine_path)
    except Exception as e:
        print('Failed to get info about ' + engine_path + '!')
        print('Exception: ' +  repr(e))
        traceback.print_exc(file=sys.stdout)

# Run every benchmark for every build and config
benchmarks = [benchmarks.getBenchmark(i) for i in options.benchmarks]
for benchmark in benchmarks:
    for engine_path in engines:
        info = engineInfo.getInfo(engine_path)
        executor = executors.getExecutor(info)