Пример #1
0
def main():
    d = Data('cars.csv')
    print "Raw Headers"
    print d.get_raw_headers()
    print "\n\n"
    print "Raw number of columns"
    print d.get_raw_num_columns()
    print "\n\n"
    print "Raw number of rows"
    print d.get_raw_num_rows()
    print "\n\n"
    print "13th row"
    print d.get_raw_row(13)
    print "\n\n"
    print "Value at row 6, header 'Car'"
    print d.get_raw_value(6, 'Car')
    print "\n\n"
    print "Matrix data"
    print d.matrix_data
    print "\n\n"
    print "Headers"
    print d.get_headers()
    print "\n\n"
    print "Number of cols"
    print d.get_num_columns()
    print "\n\n"
    print "5th row"
    print d.get_row(5)
    print "\n\n"
    print "Get value"
    print d.get_value(5, 'Horsepower')
    print "\n\n"
    print "get_data function"
    print d.get_data(['Origin', 'Horsepower'])
    print "\n\n"
    print "data range"
    print analysis.data_range(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "mean of horsepower and origin"
    print analysis.mean(d, ['Horsepower', 'Origin'])
    print "\n\n"
    print "standard deviation for horsepower and origin"
    print analysis.stdev(d, ['Horsepower', 'Origin'])
    print "\n"
    print "normalized columns origin and horsepower"
    print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "normalized together origin and horsepower"
    print analysis.normalize_columns_together(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "median of columns origin, horspower and weight"
    print analysis.median(d, ['Origin', 'Horsepower', 'Weight'])
    print d.get_data(['Origin', 'Horsepower']).shape
Пример #2
0
def main(argv):

    # test command line arguments
    if len(argv) < 2:
        print('Usage: python %s <csv filename>' % (argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = Data(argv[1])
    headers = dobj.get_headers()
    #test the five analysis functions
    print([headers[0], headers[2]])
    print("Data range by column:",
          analysis.data_range([headers[0], headers[2]], dobj))
    print("Mean:", analysis.mean([headers[0], headers[2]], dobj))
    print("Standard deviation:", analysis.stdev([headers[0], headers[2]],
                                                dobj))
    print(
        "Normalize columns separately:",
        analysis.normalize_columns_separately([headers[0], headers[2]], dobj))
    print("Normalize columns together:",
          analysis.normalize_columns_together([headers[0], headers[2]], dobj))

    #Extension 1
    print("Median:", analysis.median([headers[0], headers[2]], dobj))

    #Extension 2
    print("Median Separately:",
          analysis.median_separately([headers[0], headers[2]], dobj))

    #Extension 3
    print("just  few rows:", dobj.limit_rows())

    #Extension 4
    print(
        "just a few columns. I changed the limit to 2 for demonstration purposes:",
        dobj.limit_columns())

    #Extension 5
    print("Data range overall:",
          analysis.data_range([headers[0], headers[2]], dobj, True))

    #Extension 6
    print(
        "The next two print statements get the last row of data. I add a row of data in between,"
        "so they are different.")
    print(dobj.get_row(-1))
    dobj.add_point([1, 2, 3])
    print(dobj.get_row(-1))
Пример #3
0
    # # Run multiple linear regression on the Data objects
    # analysis.testRegression(dataClean)
    # analysis.testRegression(dataGood)
    # analysis.testRegression(dataNoisy)

    data = Data(filename='GOOG-NASDAQ_TSLA.csv')

    # print out some analyses
    print("\n\nDescriptive statistics of Tesla's stock data (daily open and close prices and trading volume:")
    print("Mean: ", analysis.mean(['Open', 'Close', 'Volume'], data))
    print("Standard deviation: ", analysis.stdev(['Open', 'Close', 'Volume'], data))
    print("Ranges: ", analysis.dataRange(['Open', 'Close', 'Volume'], data))
    print("Normalized columns: ", analysis.normalizeColumnsSeparately(['Open', 'Close', 'Volume'], data))
    print("Normalized globally: ", analysis.normalizeColumnsTogether(['Open', 'Close', 'Volume'], data))
    print("Variance: ", analysis.variance(['Open', 'Close', 'Volume'], data))
    print("Median: ", analysis.median(['Open', 'Close', 'Volume'], data))
    print("Mode value: ", analysis.modeValue(['Open', 'Close', 'Volume'], data))
    print("Mode frequency: ", analysis.modeFreq(['Open', 'Close', 'Volume'], data))
    print("Range value: ", analysis.rangeDiff(['Open', 'Close', 'Volume'], data), "\n")

    data.printData(20)

    # manipulate the data to show their efficacy
    data.set_value(0.0001, 5, 'Open')
    data.set_column(data.get_column('Open'), 'Close')
    data.add_column(data.get_column('Volume'), 'Volume2', 'numeric')
    data.add_raw_row(['6/28/10', 2.0, 3.0, 4.0, 5.0, 1000.0, 3])
    data.add_row([1.0, 2.0, 3.0, 4.0, 5.0, 6])

    # Here I print out the whole data set to show its full five-year comprehensive glory
    data.printData(20)