Exemplo n.º 1
0
def main(filename, numberOfComponents):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    prints.printEmptyLine()
    pca = analysis.pca(df, numberOfComponents, relevantColumns, labelNames)
    prints.printExplainedVarianceRatio(pca)
Exemplo n.º 2
0
def main(filename):
    df = utilities.readDataFile(filename)
    df = utilities.getDataWithTimeIndex(df)
    df = df.dropna()

    subdir = filename.split('/')[-2]
    columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig(
        subdir)

    if relevantColumns is not None:
        df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames])

    prints.printEmptyLine()

    covMat = analysis.correlationMatrix(df)
    prints.printCorrelationMatrix(covMat, df, labelNames)
Exemplo n.º 3
0
def dropIrrelevantColumns(df, args):
    relevantColumns, columnDescriptions = args

    print("Columns before removal: ")
    prints.printColumns(df, columnDescriptions)

    dfcolumns = df.columns
    for column in dfcolumns:
        if column not in relevantColumns:
            df = df.drop(column, axis=1)

    prints.printEmptyLine()
    print("Columns after removal: ")
    prints.printColumns(df, columnDescriptions)
    prints.printEmptyLine()

    return df
Exemplo n.º 4
0
    df_iris = pd.read_csv(filename).drop(column, axis=1)
    print("Writing file {}".format(target_file))
    df_iris.to_csv(target_file, index=False)


pyName = "dropColumn.py"
arguments = [
    "- filename (string)",
    "- target filename (string)",
    "- name of column (string)",
]

# usage: python dropColumn.py file targetfile column
if __name__ == "__main__":
    start_time = time.time()
    prints.printEmptyLine()

    print("Running", pyName)
    print("Prints dataframe")
    prints.printEmptyLine()

    try:
        filename = sys.argv[1]
        target_file = sys.argv[2]
        column = sys.argv[3]
    except IndexError:
        print(pyName, "was called with inappropriate arguments")
        print("Please provide the following arguments:")
        for argument in arguments:
            print(argument)
        sys.exit()
Exemplo n.º 5
0
def getDataByTimeframe(df, start, end):
    print("Finding data between", start, "and", end)
    df = df.loc[start:end]
    print("Found " + str(df.shape[0]) + " rows")
    prints.printEmptyLine()
    return df