Exemplo n.º 1
0
def covariance(points):
    xs = util.getCol(points, 0)
    ys = util.getCol(points, 1)
    x_bar = numpy.mean(xs)
    y_bar = numpy.mean(ys)

    def topPart(point):
        return math.pow((point[0] - x_bar), 2) * math.pow(
            (point[1] - y_bar), 2)

    top = [topPart(point) for point in points]
    return sum(top) / (len(points) - 1)
Exemplo n.º 2
0
def linear_regression(points):
    xs = util.getCol(points, 0)
    ys = util.getCol(points, 1)
    x_bar = numpy.mean(xs)
    y_bar = numpy.mean(ys)

    m = slope(points, x_bar, y_bar)
    b = y_bar - m * x_bar

    def line(x):
        return m * x + b

    return line
Exemplo n.º 3
0
def graphablePoints(points):
    xs = util.getCol(points, 0)
    max_x = max(xs)
    min_x = min(xs)
    getY = linear_regression(points)

    return [[min_x, getY(min_x)], [max_x, getY(max_x)]]
Exemplo n.º 4
0
def scatter(points, name, step):
    pyplot.figure()

    # Create values
    xs = util.getCol(points, 0)
    ys = util.getCol(points, 1)

    # Create plot
    pyplot.suptitle(name + ' vs. ' + 'MPG')
    pyplot.plot(xs, ys, 'b.')
    pyplot.xlim(0, int(max(xs) * 1.1))
    pyplot.ylim(0, int(max(ys) * 1.1))
    pyplot.grid(True)
    pyplot.ylabel('MPG')
    pyplot.xlabel(name)

    # Save plot
    filename = str(step) + '-scatter-' + name + '.pdf'
    pyplot.savefig(figureFolder + filename)
    pyplot.figure() # Reset for good measure
    pyplot.close()
Exemplo n.º 5
0
def scatterWithLine(points, name, step, ylabel='MPG', xlabel=None, usesMPG=True):
    pyplot.figure()

    xs = util.getCol(points, 0)
    ys = util.getCol(points, 1)

    correlationCoeff = str(round(math_utils.correlationCoeff(points), 2))
    covariance = str(round(math_utils.covariance(points), 2))

    vs = ''
    if (usesMPG):
        vs = 'vs. MPG; '

    firstPart = name + vs + ' Correlation: '
    title = firstPart + correlationCoeff + ' Covariance: ' + covariance

    # Create plot
    pyplot.suptitle(title)
    pyplot.plot(xs, ys, 'b.')
    pyplot.xlim(0, int(max(xs) * 1.1))
    pyplot.ylim(0, int(max(ys) * 1.1))
    pyplot.grid(True)
    pyplot.ylabel(ylabel)
    if not xlabel:
        pyplot.xlabel(name)
    else:
        pyplot.xlabel(xlabel)

    linePoints = math_utils.graphablePoints(points)
    lineXs = util.getCol(linePoints, 0)
    lineYs = util.getCol(linePoints, 1)
    pyplot.plot(lineXs, lineYs, 'r')

    # Save plot
    filename = str(step) + '-scatter-regression-' + name + '.pdf'
    pyplot.savefig(figureFolder + filename)

    pyplot.figure()
    pyplot.close()
Exemplo n.º 6
0
def correlationCoeff(points):
    xs = util.getCol(points, 0)
    ys = util.getCol(points, 1)
    x_bar = numpy.mean(xs)
    y_bar = numpy.mean(ys)

    x_delta = []
    y_delta = []
    x_bot_delta = []
    y_bot_delta = []
    for point in points:
        x = point[0]
        y = point[1]

        topX = (x - x_bar)
        topY = (y - y_bar)
        x_delta.append(topX)
        y_delta.append(topY)
        x_bot_delta.append(topX * topX)
        y_bot_delta.append(topY * topY)

    top = sum(x_delta) * sum(y_delta)
    bot = math.sqrt(sum(x_bot_delta) * sum(y_bot_delta))
    return top / bot
Exemplo n.º 7
0
def data_vis():
    table = file_system.loadTable('../datasets/incomeNoNA.csv')

    col = util.getCol(table, INDICES['degree'])
    freqDict = analysis.frequency(col)
    diagram.pie(freqDict, 'Degree', 'Pie-Degree')

    col = util.getCol(table, INDICES['ethnicity'])
    freqDict = analysis.frequency(col)
    diagram.pie(freqDict, 'Ethnicity', 'Pie-Ethnicity')

    col = util.getCol(table, INDICES['marital-status'])
    freqDict = analysis.frequency(col)
    diagram.pie(freqDict, 'Marital Status', 'Marital-Status')

    col = util.getCol(table, INDICES['gender'])
    freqDict = analysis.frequency(col)
    diagram.pie(freqDict, 'Gender', 'Gender')

    col = util.getCol(table, INDICES['age'])
    freqDict = analysis.frequency(col)
    diagram.dot(freqDict, 'Age', 'Dot-Age')

    table = table_utils.mapCol(table, constants.INDICES['degree'],
                               homework.get_degree)
    table = table_utils.mapCol(table, constants.INDICES['marital-status'],
                               homework.get_marital_status)
    table = table_utils.mapCol(table, constants.INDICES['ethnicity'],
                               homework.get_ethnicity)
    table = table_utils.mapCol(table, constants.INDICES['salary'],
                               homework.get_salary_continuous)
    table = table_utils.mapCol(table, constants.INDICES['gender'],
                               homework.get_gender)

    col = util.getCol(table, INDICES['degree'])
    freqDict = analysis.frequency(col)
    diagram.frequency(freqDict, 'Degree', 'Frequency-Degree')

    col = util.getCol(table, INDICES['marital-status'])
    freqDict = analysis.frequency(col)
    diagram.frequency(freqDict, 'Marital Status', 'Frequency-Marital-Status')