def covariance(points): xs = util.getCol(points, 0) ys = util.getCol(points, 1) x_bar = numpy.mean(xs) y_bar = numpy.mean(ys) def topPart(point): return math.pow((point[0] - x_bar), 2) * math.pow( (point[1] - y_bar), 2) top = [topPart(point) for point in points] return sum(top) / (len(points) - 1)
def linear_regression(points): xs = util.getCol(points, 0) ys = util.getCol(points, 1) x_bar = numpy.mean(xs) y_bar = numpy.mean(ys) m = slope(points, x_bar, y_bar) b = y_bar - m * x_bar def line(x): return m * x + b return line
def graphablePoints(points): xs = util.getCol(points, 0) max_x = max(xs) min_x = min(xs) getY = linear_regression(points) return [[min_x, getY(min_x)], [max_x, getY(max_x)]]
def scatter(points, name, step): pyplot.figure() # Create values xs = util.getCol(points, 0) ys = util.getCol(points, 1) # Create plot pyplot.suptitle(name + ' vs. ' + 'MPG') pyplot.plot(xs, ys, 'b.') pyplot.xlim(0, int(max(xs) * 1.1)) pyplot.ylim(0, int(max(ys) * 1.1)) pyplot.grid(True) pyplot.ylabel('MPG') pyplot.xlabel(name) # Save plot filename = str(step) + '-scatter-' + name + '.pdf' pyplot.savefig(figureFolder + filename) pyplot.figure() # Reset for good measure pyplot.close()
def scatterWithLine(points, name, step, ylabel='MPG', xlabel=None, usesMPG=True): pyplot.figure() xs = util.getCol(points, 0) ys = util.getCol(points, 1) correlationCoeff = str(round(math_utils.correlationCoeff(points), 2)) covariance = str(round(math_utils.covariance(points), 2)) vs = '' if (usesMPG): vs = 'vs. MPG; ' firstPart = name + vs + ' Correlation: ' title = firstPart + correlationCoeff + ' Covariance: ' + covariance # Create plot pyplot.suptitle(title) pyplot.plot(xs, ys, 'b.') pyplot.xlim(0, int(max(xs) * 1.1)) pyplot.ylim(0, int(max(ys) * 1.1)) pyplot.grid(True) pyplot.ylabel(ylabel) if not xlabel: pyplot.xlabel(name) else: pyplot.xlabel(xlabel) linePoints = math_utils.graphablePoints(points) lineXs = util.getCol(linePoints, 0) lineYs = util.getCol(linePoints, 1) pyplot.plot(lineXs, lineYs, 'r') # Save plot filename = str(step) + '-scatter-regression-' + name + '.pdf' pyplot.savefig(figureFolder + filename) pyplot.figure() pyplot.close()
def correlationCoeff(points): xs = util.getCol(points, 0) ys = util.getCol(points, 1) x_bar = numpy.mean(xs) y_bar = numpy.mean(ys) x_delta = [] y_delta = [] x_bot_delta = [] y_bot_delta = [] for point in points: x = point[0] y = point[1] topX = (x - x_bar) topY = (y - y_bar) x_delta.append(topX) y_delta.append(topY) x_bot_delta.append(topX * topX) y_bot_delta.append(topY * topY) top = sum(x_delta) * sum(y_delta) bot = math.sqrt(sum(x_bot_delta) * sum(y_bot_delta)) return top / bot
def data_vis(): table = file_system.loadTable('../datasets/incomeNoNA.csv') col = util.getCol(table, INDICES['degree']) freqDict = analysis.frequency(col) diagram.pie(freqDict, 'Degree', 'Pie-Degree') col = util.getCol(table, INDICES['ethnicity']) freqDict = analysis.frequency(col) diagram.pie(freqDict, 'Ethnicity', 'Pie-Ethnicity') col = util.getCol(table, INDICES['marital-status']) freqDict = analysis.frequency(col) diagram.pie(freqDict, 'Marital Status', 'Marital-Status') col = util.getCol(table, INDICES['gender']) freqDict = analysis.frequency(col) diagram.pie(freqDict, 'Gender', 'Gender') col = util.getCol(table, INDICES['age']) freqDict = analysis.frequency(col) diagram.dot(freqDict, 'Age', 'Dot-Age') table = table_utils.mapCol(table, constants.INDICES['degree'], homework.get_degree) table = table_utils.mapCol(table, constants.INDICES['marital-status'], homework.get_marital_status) table = table_utils.mapCol(table, constants.INDICES['ethnicity'], homework.get_ethnicity) table = table_utils.mapCol(table, constants.INDICES['salary'], homework.get_salary_continuous) table = table_utils.mapCol(table, constants.INDICES['gender'], homework.get_gender) col = util.getCol(table, INDICES['degree']) freqDict = analysis.frequency(col) diagram.frequency(freqDict, 'Degree', 'Frequency-Degree') col = util.getCol(table, INDICES['marital-status']) freqDict = analysis.frequency(col) diagram.frequency(freqDict, 'Marital Status', 'Frequency-Marital-Status')