示例#1
0
	def handleLinearRegression(self):
		if self.data == None:
			print "you don't have data"
			return
		variables = Dialogs.LinRegressDialog(self.root, self.data.get_headers())
		colorbox = Dialogs.ColorDialog(self.root, self.data.get_headers())
		if variables.result == []:
			return
		if colorbox.resultc != []:
			self.colorMatrix =analysis.normalize_columns_separately(self.data, (colorbox.resultc,))
			self.colorResult = colorbox.resultc
		else:
			self.colorResult = None
			self.colorMatrix = None
		if colorbox.results != []:
			self.sizeMatrix =analysis.normalize_columns_separately(self.data, (colorbox.results,))
			self.sizeResult = colorbox.results
		else:
			self.sizeResult = None
			self.sizeMatrix = None

		self.dataheaders = variables.result
		self.resetData()
		#self.resetAxes()
		self.view.reset()
		self.view.screen[0, 0] = float(self.canvas.winfo_width())
		self.view.screen[0, 1] = float(self.canvas.winfo_height())
		self.buildLinearRegression()
示例#2
0
	def buildPCA(self, headers, data):
		self.totalReset()

		points = analysis.normalize_columns_separately(headers[0:3], data)
		
		if len(data.get_data_headers()) > 3:
			self.size = analysis.normalize_columns_separately([headers[3]], data)
			
		if len(data.get_data_headers()) > 4:
			self.color = analysis.normalize_columns_together([headers[4]], data)
			
		vtm = self.view.build()
		#make a matrix of only ones
		self.data2matrix = np.ones((points.shape[0], 4))

		if len(data.get_data_headers()) < 3:
			self.data2matrix[:,:-2] = points
			self.data2matrix[:,-2] = np.zeros((points.shape[0]))
		else:
			self.data2matrix[:,:-1] = points
			
		tend = self.data2matrix * vtm.T
		
		dx = int(self.sizeOption.get())
		dy = int(self.sizeOption.get())
		
		for i in range(tend.shape[0]):
			tx = tend[i, 0]
			ty = tend[i, 1]
			tz = tend[i, 2]

			pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy,
												 fill='black', outline='')
			self.objects.append(pt)
示例#3
0
文件: display.py 项目: TQ24/CS251
    def buildPoints(self, plot_cols):
        # Delete any existing canvas objects used for plotting data.
        self.clearData()

        # If you are selecting only 2 columns to plot, add a column of 0's (z-value) 
        # and a column of 1's (homogeneous coordinate) to the data.
        self.datamatrix = analysis.normalize_columns_separately(plot_cols, self.data_obj)
        if self.selected_size == 1:
            self.size_list = 1
        else:
            self.size_list = analysis.normalize_columns_separately([self.selected_size], self.data_obj)

        if self.selected_color == "blue":
            self.color_list = "blue"
        else:
            self.color_list = analysis.normalize_columns_separately([self.selected_color], self.data_obj)
        # print("color list:", self.color_list)
        n,m = self.datamatrix.shape
        ones = np.ones((n, 1))
        if len(plot_cols)==1:
            self.hasHistogram = True
            zeros = ones * 0
            self.datamatrix = np.hstack((self.datamatrix, zeros, zeros, ones))
            self.buildHistogram()
            return
        if len(plot_cols)==2:
            self.hasHistogram = False
            zeros = ones * 0
            # zeros = np.matrix(zeros)
            self.datamatrix = np.hstack((self.datamatrix, zeros, ones))
        if len(plot_cols)==3:
            self.hasHistogram = False
            self.datamatrix = np.hstack((self.datamatrix, ones))
        self.build_miniwin()
        vtm = self.vobj.build()
        pts = vtm * self.datamatrix.T
        pts = pts.T
        for i in range(pts.shape[0]):
            x = pts[i, 0]
            y = pts[i, 1]
            if isinstance(self.size_list, int):
                dx = 1
            else:
                dx = float(self.size_list[i])*2+1
            #print("dx", dx)
            # print(rgb)
            if isinstance(self.color_list, str):
                color = "blue"
            else:
                rgb = (0, int((1-float(self.color_list[i]))*255), int(float(self.color_list[i])*255))
                color = f'#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}'
            print(color)
            if self.texture_selection.get() == "Solid":
                pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill = color, outline="")
            elif self.texture_selection.get() == "Outline":
                pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill="", outline=color)
            else:
                pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill = color, outline="")
            self.objects.append(pt)
示例#4
0
def main():
    d = Data('cars.csv')
    print "Raw Headers"
    print d.get_raw_headers()
    print "\n\n"
    print "Raw number of columns"
    print d.get_raw_num_columns()
    print "\n\n"
    print "Raw number of rows"
    print d.get_raw_num_rows()
    print "\n\n"
    print "13th row"
    print d.get_raw_row(13)
    print "\n\n"
    print "Value at row 6, header 'Car'"
    print d.get_raw_value(6, 'Car')
    print "\n\n"
    print "Matrix data"
    print d.matrix_data
    print "\n\n"
    print "Headers"
    print d.get_headers()
    print "\n\n"
    print "Number of cols"
    print d.get_num_columns()
    print "\n\n"
    print "5th row"
    print d.get_row(5)
    print "\n\n"
    print "Get value"
    print d.get_value(5, 'Horsepower')
    print "\n\n"
    print "get_data function"
    print d.get_data(['Origin', 'Horsepower'])
    print "\n\n"
    print "data range"
    print analysis.data_range(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "mean of horsepower and origin"
    print analysis.mean(d, ['Horsepower', 'Origin'])
    print "\n\n"
    print "standard deviation for horsepower and origin"
    print analysis.stdev(d, ['Horsepower', 'Origin'])
    print "\n"
    print "normalized columns origin and horsepower"
    print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "normalized together origin and horsepower"
    print analysis.normalize_columns_together(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "median of columns origin, horspower and weight"
    print analysis.median(d, ['Origin', 'Horsepower', 'Weight'])
    print d.get_data(['Origin', 'Horsepower']).shape
示例#5
0
    def buildLinearRegression(self):
        if self.dialogWindow.getIV2() == None:
            headers = [self.dialogWindow.getIV(), self.dialogWindow.getDV()]
            normalized = analysis.normalize_columns_separately(headers, self.dataObj)
            zeros = np.zeros((normalized.shape[0], 1))
            normalized = np.hstack((normalized, zeros))
            ones = np.ones((normalized.shape[0], 1))
            normalized = np.hstack((normalized, ones))

            vtm = self.view.build()
            pts = (vtm*self.data2plot.T).T

            (m, b, rVal, pVal, stdErr, indRange, depRange) = analysis.single_linear_regression(
            self.dataObj, [self.dialogWindow.getIV()], [self.dialogWindow.getDV()])

            self.endpoint1 = ((indRange[0][0] * m + b) - depRange[0][0]) / (depRange[0][1] - depRange[0][0])
            self.endpoint2 = ((indRange[0][1] * m + b) - depRange[0][0]) / (depRange[0][1] - depRange[0][0])
            self.endpoints = np.matrix([[0., self.endpoint1, 0., 1.],
                                        [1., self.endpoint2, 0., 1.]])
            self.endpoints = (vtm * self.endpoints.T)
            self.line = self.canvas.create_line(self.endpoints[0, 0], self.endpoints[1, 0],
            self.endpoints[0, 1],self.endpoints[1, 1], fill="orange")
            self.reg_objects.append(self.line)

            self.buildRegressionLegend(m, b, rVal)
            return

        else:
            indHeaders = [self.dialogWindow.getIV(), self.dialogWindow.getIV2()]
            (b, sse, r2, t, p) = analysis.linear_regression(self.dataObj, indHeaders, self.dialogWindow.getDV())
            self.buildMultipleRegressionLegend(b, sse, r2, t, p)
            return
示例#6
0
    def buildPoints(self, cols):
        self.clear()

        # build data point matrix
        self.dataMatrix = analysis.normalize_columns_separately(self.data, cols)
        if len(cols) == 2:
            zeros = np.zeros(self.data.get_raw_num_rows())
            self.dataMatrix = np.hstack( (self.dataMatrix, np.matrix(zeros).T) )
        ones = np.ones(self.data.get_raw_num_rows())
        self.dataMatrix = np.hstack( (self.dataMatrix, np.matrix(ones).T) )

        # add to view screen
        vtm = self.view.build()
        pts = (vtm * self.dataMatrix.T).T
        shape = self.dataShape.get()
        for i in range(pts.shape[0]):
            if shape == "circle":
                pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], 
                        pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], 
                        pts[i, 1]+self.sizes[i], outline=self.colors[i])
            elif shape == "square":
                pt = self.canvas.create_rectangle(pts[i, 0]-self.sizes[i], 
                        pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], 
                        pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='')
            else:
                pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], 
                        pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], 
                        pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='')
            self.objects.append(pt)
    def buildHistogram(self):
        for rec in self.bars:
            self.canvas.delete(rec)
        self.bars = []
        self.updateLabels()
        # draw bars
        vtm = self.vobj.build()
        selected = analysis.normalize_columns_separately(
            self.headernames, self.dobj)

        axes = (vtm * self.axes.T).T
        binw = int((axes[1, 0] - axes[0, 0]) / 10)
        one_h = (axes[2, 1] - axes[3, 1]) / selected.shape[0]
        ten_pct = 0
        twenty_pct = 0
        thirty_pct = 0
        fourty_pct = 0
        fifty_pct = 0
        sixty_pct = 0
        seventy_pct = 0
        eighty_pct = 0
        ninety_pct = 0
        hundred_pct = 0
        for i in range(selected.shape[0]):
            if selected[i, 0] < 0.1:
                ten_pct += 1
            elif 0.1 <= selected[i, 0] < 0.2:
                twenty_pct += 1
            elif 0.2 <= selected[i, 0] < 0.3:
                thirty_pct += 1
            elif 0.3 <= selected[i, 0] < 0.4:
                fourty_pct += 1
            elif 0.4 <= selected[i, 0] < 0.5:
                fifty_pct += 1
            elif 0.5 <= selected[i, 0] < 0.6:
                sixty_pct += 1
            elif 0.6 <= selected[i, 0] < 0.7:
                seventy_pct += 1
            elif 0.7 <= selected[i, 0] < 0.8:
                eighty_pct += 1
            elif 0.8 <= selected[i, 0] < 0.9:
                ninety_pct += 1
            elif 0.9 <= selected[i, 0] <= 1:
                hundred_pct += 1
        i = 0
        for j in [
                ten_pct, twenty_pct, thirty_pct, fourty_pct, fifty_pct,
                sixty_pct, seventy_pct, eighty_pct, ninety_pct, hundred_pct
        ]:
            x0 = axes[0, 0] + i * binw
            y0 = axes[2, 1] - j * one_h
            x1 = axes[0, 0] + (i + 1) * binw
            y1 = axes[2, 1]
            i += 1
            rec = self.canvas.create_rectangle(x0,
                                               y0,
                                               x1,
                                               y1,
                                               fill="dark orange")
            self.bars.append(rec)
示例#8
0
    def buildPoints(self, headers):
        #Plots data based on what user chose for dimensions for specific columns of data
        #Takes in a list of headers from dialog box selections
        self.clearData()

        norm = an.normalize_columns_separately(self.data, headers[0:2])
        zeromatrix = np.zeros(norm.shape[0])
        onesmatrix = np.ones(norm.shape[0])

        #x and y are automatically first two dimensions
        xdata = headers[0]
        ydata = headers[1]

        if xdata != None and ydata != None:
            dmatrix = np.matrix(norm)
            nmatrix = np.matrix((zeromatrix, onesmatrix)).T
            self.dataPointMatrix = np.hstack((dmatrix, nmatrix))

        vtm = self.v.build()
        pts = (vtm * self.dataPointMatrix.T).T

        for i in range(pts.shape[0]):
            x = pts[i, 0]
            y = pts[i, 1]
            dx = 5
            pt = self.canvas.create_oval(x - dx,
                                         y - dx,
                                         x + dx,
                                         y + dx,
                                         fill='red',
                                         outline='')
            self.objects.append(pt)
示例#9
0
def main(argv):

    # test command line arguments
    if len(argv) < 2:
        print('Usage: python %s <csv filename>' % (argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = data.Data(argv[1])

    # print out information about the data
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of columns: ', dobj.get_num_dimensions())

    # print out the headers
    print("\nHeaders:")
    headers = dobj.get_headers()
    s = headers[0]
    for header in headers[1:]:
        s += ", " + header
    print(s)

    # print out the types
    print("\nTypes")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    # print out a single row
    print("\nPrinting row index 2")
    print(dobj.get_row(2))

    # print out all of the data
    print("\nData")
    headers = dobj.get_headers()
    print("headers:", headers)
    for i in range(dobj.get_num_points()):
        s = str(dobj.get_value(headers[0], i))
        for header in headers[1:]:
            s += "%10.3s" % (dobj.get_value(header, i))
        print(s)

    print("\n\n\n\nselect_columns")

    d = dobj.get_data()
    # print("Data:", d)
    s = dobj.select_columns(['thing1', 'thing3'])
    print("Selected columns:", s)

    print("Data range:", analysis.data_range(['thing1', 'thing3'], dobj))
    print("Mean:", analysis.mean(['thing1', 'thing3'], dobj))
    print("Standard deviation:", analysis.stdev(['thing1', 'thing3'], dobj))
    print("Normalize columns separately:",
          analysis.normalize_columns_separately(['thing1', 'thing3'], dobj))
    print("Normalize columns together:",
          analysis.normalize_columns_together(['thing1', 'thing3'], dobj))
示例#10
0
	def buildLinearRegression(self,headers):
		
		
		normalized = analysis.normalize_columns_separately( headers, self.data )
		
		list = normalized.tolist()
		for row in range(len(list)):
			list[row].append(0)
			list[row].append(1)
		normalized = np.matrix(list)	
		self.points = normalized
		vtm = self.view.build()
		pts = (vtm * self.points.T).T
		for i in range( pts.shape[0] ):
			row = pts.tolist()[i]
			dx = 3
			dy = 3
			
			
			
			if self.shapeOption.get() == "Dot":		
				pt = self.canvas.create_oval( row[0]-dx, row[1]-dx, row[0]+dx, row[1]+dx,
											  fill=self.colorOption.get(), outline='', tags="data" )
				self.dataObjects.append(pt)							  
				self.objects.append(pt)
										  
			
			elif self.shapeOption.get() == "Square":
				pt = self.canvas.create_rectangle( row[0]-dx, row[1]-dx, row[0]+dx, row[1]+dx,
											  fill=self.colorOption.get(), outline='', tags ="data" )
				self.dataObjects.append(pt) 
				self.objects.append(pt)
		
		unnormalized = self.data.get_data(headers).T.tolist()	
		regress_output = scipy.stats.linregress(unnormalized[0],unnormalized[1])
		m = round(regress_output[0],3)
		b = round(regress_output[1], 3)
		r = round(regress_output[2]*regress_output[2], 3)
		ranges = analysis.data_range(headers,self.data)
		xmin = ranges[0][0]
		xmax = ranges[0][1]
		ymin = ranges[1][0]
		ymax = ranges[1][1]
		pt1 = [0.0, ((xmin * m + b) - ymin)/(ymax - ymin),0,1 ]
		pt2 = [1.0, ((xmax * m + b) - ymin)/(ymax - ymin),0,1 ]
		print "point1"
		print pt1
		print "point2"
		print pt2		
		self.regressionMatrix = np.matrix([pt1,pt2])	
		pts = (vtm * self.regressionMatrix.T).T
		print pts
		best_fit = self.canvas.create_line(pts[0,0],pts[0,1],pts[1,0],pts[1,1], width=3, fill='gold',tags="data")
		self.regressionLines.append(best_fit)
		self.label['text'] = "The best fit line equation:\n y = " + str(m) + "x + " + str(b)+"\n\nR^2 value: " + str(r)
示例#11
0
	def handlePlotData(self, event = None):
		if self.data is None:
			print 'you don\'t have any data'
			return
		headerbox = Dialogs.AxesDialog(self.root, self.data.get_headers())
		colorbox = Dialogs.ColorDialog(self.root, self.data.get_headers())
		#result = headerbox.result + colorbox.result
		#print headerbox.result
		if headerbox.result != []:
			#the point of new data is for when the user tries to plot new data, but cancels out of it, since the headerboxes need the new data, but the canvas needs the old data
			#print headerbox.result
			self.dataheaders = headerbox.result
			temp_matrix = analysis.normalize_columns_separately(self.data, headerbox.result)
			if colorbox.resultc != []:
				if self.colorVar.get() == 1:
					temp_matrix2 = self.data.get_data((colorbox.resultc,))
				else:
					temp_matrix2 =analysis.normalize_columns_separately(self.data, (colorbox.resultc,))
			if colorbox.results != []:
				temp_matrix3 =analysis.normalize_columns_separately(self.data, (colorbox.results,))
			self.rows = len(temp_matrix)
			#print self.rows
			if len(headerbox.result) == 2:
				temp_matrix = np.hstack((temp_matrix, np.zeros(shape=(self.rows,1))))
			homogenous_coordinates = np.ones(shape =(self.rows, 1))
			self.dataMatrix = np.hstack((temp_matrix , homogenous_coordinates))
			if colorbox.resultc != []:
				self.colorMatrix = temp_matrix2
				self.colorResult = colorbox.resultc
			else:
				self.colorResult = None
				self.colorMatrix = None
			
			if colorbox.results != []:
				self.sizeMatrix = temp_matrix3
				self.sizeResult = colorbox.results
			else:
				self.sizeResult = None
				self.sizeMatrix = None			
			self.buildAxes()
示例#12
0
文件: data.py 项目: ejseal21/CS251
def main(argv):

    # test command line arguments
    if len(argv) < 2:
        print('Usage: python %s <csv filename>' % (argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = Data(argv[1])
    headers = dobj.get_headers()
    #test the five analysis functions
    print([headers[0], headers[2]])
    print("Data range by column:",
          analysis.data_range([headers[0], headers[2]], dobj))
    print("Mean:", analysis.mean([headers[0], headers[2]], dobj))
    print("Standard deviation:", analysis.stdev([headers[0], headers[2]],
                                                dobj))
    print(
        "Normalize columns separately:",
        analysis.normalize_columns_separately([headers[0], headers[2]], dobj))
    print("Normalize columns together:",
          analysis.normalize_columns_together([headers[0], headers[2]], dobj))

    #Extension 1
    print("Median:", analysis.median([headers[0], headers[2]], dobj))

    #Extension 2
    print("Median Separately:",
          analysis.median_separately([headers[0], headers[2]], dobj))

    #Extension 3
    print("just  few rows:", dobj.limit_rows())

    #Extension 4
    print(
        "just a few columns. I changed the limit to 2 for demonstration purposes:",
        dobj.limit_columns())

    #Extension 5
    print("Data range overall:",
          analysis.data_range([headers[0], headers[2]], dobj, True))

    #Extension 6
    print(
        "The next two print statements get the last row of data. I add a row of data in between,"
        "so they are different.")
    print(dobj.get_row(-1))
    dobj.add_point([1, 2, 3])
    print(dobj.get_row(-1))
示例#13
0
	def buildPoints(self, headers):
		# Plots data based on what user chose for dimensions for specific columns of data
		# Takes in a list of headers from dialog box selections
		self.clearData()

		if len(self.pcaList)>0:
			idx = self.AnalysisWindow.index(tk.ACTIVE)
			self.data=self.pcaList[idx]

		norm = an.normalize_columns_separately(self.data, headers[0:3])
		zeromatrix = np.zeros(norm.shape[0])
		onesmatrix = np.ones(norm.shape[0])

		# x and y are automatically first two dimensions
		xdata = headers[0]
		ydata = headers[1]



		# if the length of the headers is 2, only plot data on  x and y axes
		if len(headers) == 2:
			zdata = None
			colorpt = None
			sizept = None

		# if the length of the headers is 3, only plot data on x, y, and z axes
		# z axis is third dimension of data
		# user must choose to plot x,y, and z axes in that order if header is length 3
		if len(headers) == 3:
			zdata = headers[2]
			colorpt = None
			sizept = None

		# if the length of the headers is 4, only plot data on x, y, z, and color axes
		# z axis is third dimension of data
		# color axis is fourth dimension of data
		# user must choose to plot x,y,z, and color axes in that order
		if len(headers) == 4:
			zdata = headers[2]
			colorpt = headers[3]
			sizept = None
		#print "hedaders", headers
		# if the length of the headers is 5, plot data on x, y, z, color, and size axes
		# z axis is third dimension of data
		# color axis fourth dimension of data
		# size axis is the fifth dimension of data
		# user must choose to plot x,y,z, color, and size in that order
		if len(headers) == 5:
			zdata = headers[2]
			colorpt = headers[3]
			sizept = headers[4]

		if xdata != None and ydata != None:
			dmatrix = np.matrix(norm)
			nmatrix = np.matrix((zeromatrix, onesmatrix)).T
			self.dataPointMatrix = np.hstack((dmatrix, nmatrix))

		if xdata != None and ydata != None and zdata != None:
			dmatrix = np.matrix(norm)
			nmatrix = np.matrix((onesmatrix)).T
			self.dataPointMatrix = np.hstack((dmatrix, nmatrix))

		vtm = self.v.build()
		pts = (vtm * self.dataPointMatrix.T).T

		self.factorlist = []

		#If color list is not empty color each point as indicated in color list
		if len(self.colorlist) > 0:
			for i in range(pts.shape[0]):
				self.factorlist.append(5)
				dx = self.factorlist[i]
				x = pts[i, 0]
				y = pts[i, 1]
				self.color = self.colorlist[i]
				pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx,
											 fill=self.color, outline='')
				self.objects.append(pt)
				return

		self.colorlist = []


		# if user chooses color and size dimensions for data
		# convert data set that user chose for color dimension to colors with a blue to yellow
		# gradient, blue being minimum value and yellow representing maximum value
		# Also convert data set that user chose for size dimension to size of data points
		# The size of data points range from 0 to 7
		if colorpt != None and sizept != None:
			self.clearData()
			colornorm = an.normalize_columns_separately(self.data, [colorpt])
			self.colorlist = []
			for i in range(colornorm.shape[0]):
				alpha = colornorm[i, 0] * 255
				self.colorlist.append((int(255 - alpha), int(255 - alpha), int(alpha)))

			sizenorm = an.normalize_columns_separately(self.data, [sizept])

			for i in range(sizenorm.shape[0]):
				self.factorlist.append(sizenorm[i, 0] * 7)

			for i in range(pts.shape[0]):
				dx = self.factorlist[i]
				x = pts[i, 0]
				y = pts[i, 1]
				self.color = self.colorlist[i]
				pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill="#%02X%02X%02X" % self.color,
											 outline='')
				self.objects.append(pt)


		# if user chooses color and size dimensions for data
		# convert data set that user chose for color dimension to colors with a blue to yellow
		# gradient, blue being minimum value and yellow representing maximum value
		# Draw all data points with size 5
		elif colorpt != None and sizept == None:

			self.clearData()

			colornorm = an.normalize_columns_separately(self.data, [colorpt])
			for i in range(colornorm.shape[0]):
				alpha = colornorm[i, 0] * 255
				self.colorlist.append((int(255 - alpha), int(255 - alpha), int(alpha)))

			for i in range(pts.shape[0]):
				self.factorlist.append(5)
				dx = self.factorlist[i]
				x = pts[i, 0]
				y = pts[i, 1]
				self.color = self.colorlist[i]
				pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx,
											 fill="#%02X%02X%02X" % self.color, outline='')
				self.objects.append(pt)

		#If user chooses not to plot size or
		elif colorpt == None and sizept == None:
			self.clearData()
			# del self.factorlist[:
			for i in range(pts.shape[0]):
				self.factorlist.append(5)
				self.colorlist.append('red')
				self.color = self.colorlist[i]
				x = pts[i, 0]
				y = pts[i, 1]
				dx = self.factorlist[i]
				pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx,
											 fill=self.color, outline='')
				self.objects.append(pt)

		# If there is a cluster object
		# Plot the data and then cluster the data by assigning each cluster a unique color
		if len(self.clusterList[-1][1])>0:
			self.clearData()
			for i in range(pts.shape[0]):
				codeVal=self.clusterList[-1][1][i]
				self.color=self.clustColorList[codeVal]
				x = pts[i, 0]
				y = pts[i, 1]
				dx = self.factorlist[i]
				pt = self.canvas.create_oval(x-dx, y-dx, x+dx, y+dx, fill=self.color, outline='')
				self.objects.append(pt)
示例#14
0
    # print out the types
    print("\nTypes:")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    r = analysis.data_range(headers, dobj)
    print("Data Range:\n ", r)
    mean = analysis.mean(headers, dobj)
    print("Mean: \n", mean)

    std = analysis.stdev(headers, dobj)
    print("Standard Deviation: \n", std)

    #std = analysis.stdev(headers, dobj)
    #print("Standard Deviation: \n", std)

    nor_m1 = analysis.normalize_columns_separately(headers, dobj)
    print("Normalized Columns Separately: \n", nor_m1)

    nor_m2 = analysis.normalize_columns_together(headers, dobj)
    print("Normalized Columns Together: \n", nor_m2)

    #dobj.add_colummn('new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14])
    #print("\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]")
    #print("----- New Matrix: -----")
    #print(dobj.get_whole_matrix())
    print("---------------------------------")
示例#15
0
    def buildPoints(self, headers):
        print("\n\nheaders for buildPoints:", headers)
        # delete existting canvas objects used for plotting data
        for point in self.points:
            self.canvas.delete(point)
        self.points = []
        if self.pcaplot:
            self.plot_data = analysis.normalize_columns_separately(
                headers, self.PCAs[self.PCAlistbox.curselection()[0]])
        else:
            self.plot_data = analysis.normalize_columns_separately(
                headers, self.data)
        self.plot = self.plot_data[:, :2]
        z_flag = False
        if self.menuFlags[2]:
            self.plot = np.hstack((self.plot, self.plot_data[:, 2]))
            z_flag = True
        else:
            self.plot = np.hstack((self.plot, np.zeros((len(self.plot), 1))))

        size_flag = False
        if self.menuFlags[3]:
            size_flag = True
            if z_flag:
                self.size = self.plot_data[:, 3]
            else:
                self.size = self.plot_data[:, 2]
        else:
            self.size = np.ones((len(self.plot), 1))
        self.size = 3 * self.size + 1

        if self.menuFlags[4]:
            if z_flag and size_flag:
                color = self.plot_data[:, 4]
            elif (z_flag and not size_flag) or (not z_flag and size_flag):
                color = self.plot_data[:, 3]
            else:
                color = self.plot_data[:, 2]

            self.green = -255 * color + 255
            self.red = 255 * color
        else:
            self.green = np.ones((len(self.plot), 1))
            self.red = np.ones((len(self.plot), 1))

        # homogeneous coordinate
        self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1))))

        # make a vtm so the points aren't tiny
        vtm = self.view.build()

        # put the points through the vtm
        pts = (vtm * self.plot.T).T
        # loop over the points, drawing each one
        for i in range(len(pts)):
            x = pts[i, 0]
            y = pts[i, 1]

            # Extension 1 gives the user the capability to use different shapes
            if self.vshape.get() == 'Circle':
                pt = self.canvas.create_oval(
                    int(x - self.size[i]),
                    int(y - self.size[i]),
                    int(x + self.size[i]),
                    int(y + self.size[i]),
                    fill="#%02x%02x%02x" %
                    (int(self.red[i]), int(self.green[i]), 0),
                    outline='')
            elif self.vshape.get() == 'Rectangle':
                pt = self.canvas.create_rectangle(
                    int(x - self.size[i]),
                    int(y - self.size[i]),
                    int(x + self.size[i]),
                    int(y + self.size[i]),
                    fill="#%02x%02x%02x" %
                    (int(self.red[i]), int(self.green[i]), 0),
                    outline='')
            elif self.vshape.get() == 'Triangle':
                pt = self.canvas.create_polygon(int(x - self.size[i]),
                                                int(y + self.size[i]),
                                                int(x + self.size[i]),
                                                int(y + self.size[i]), int(x),
                                                int(y - self.size[i]))
            elif self.vshape.get() == 'Pentagon':
                pt = self.canvas.create_polygon(int(x - self.size[i]), int(y),
                                                int(x), int(y + self.size[i]),
                                                int(x + self.size[i]), int(y),
                                                int(x + self.size[i]),
                                                int(y - self.size[i]),
                                                int(x - self.size[i]),
                                                int(y - self.size[i]))
            else:
                pt = self.canvas.create_arc(
                    int(x - self.size[i]),
                    int(y - self.size[i]),
                    int(x + self.size[i]),
                    int(y + self.size[i]),
                    fill="#%02x%02x%02x" %
                    (int(self.red[i]), int(self.green[i]), 0),
                    outline='')
            # put the point object into self.points
            self.points.append(pt)
        return
示例#16
0
    def build_linear_regression(self, independent_variable,
                                dependent_variable):

        #initialize the matrix of data we want to do a linear regression on
        self.plot = analysis.normalize_columns_separately(
            [independent_variable, dependent_variable], self.data)
        # self.plot = self.data.limit_columns([independent_variable, dependent_variable])
        self.plot = np.hstack((self.plot, np.zeros((self.plot.shape[0], 1))))
        self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1))))

        #build the view matrix and transform the points
        vtm = self.view.build()
        pts = self.plot * vtm  #(vtm * self.plot.T).T

        #initialize self.size so that our movement functions don't break
        self.size = []
        #make a graphical point for each data point
        for i in range(len(pts)):
            self.size.append(3)
            x = pts[i, 0]
            y = pts[i, 1]
            if self.vshape.get() == 'Triangle':
                pt = self.canvas.create_polygon(int(x - self.size[i]),
                                                int(y + self.size[i]),
                                                int(x + self.size[i]),
                                                int(y + self.size[i]),
                                                int(x),
                                                int(y - self.size[i]),
                                                fill='black',
                                                outline='')

            elif self.vshape.get() == 'Pentagon':
                print("pentagon")
                pt = self.canvas.create_polygon(
                    (int(x - self.size[i]), int(y), int(x),
                     int(y + self.size[i]), int(x + self.size[i]), int(y),
                     int(x + self.size[i]), int(y - self.size[i]),
                     int(x - self.size[i]), int(y - self.size[i])))

            elif self.vshape.get() == 'Circle':
                pt = self.canvas.create_oval(int(x - self.size[i]),
                                             int(y - self.size[i]),
                                             int(x + self.size[i]),
                                             int(y + self.size[i]),
                                             fill='black',
                                             outline='')

            elif self.vshape.get() == 'Rectangle':
                pt = self.canvas.create_rectangle(int(x - self.size[i]),
                                                  int(y - self.size[i]),
                                                  int(x + self.size[i]),
                                                  int(y + self.size[i]),
                                                  fill='black',
                                                  outline='')

            else:
                pt = self.canvas.create_arc(int(x - self.size[i]),
                                            int(y - self.size[i]),
                                            int(x + self.size[i]),
                                            int(y + self.size[i]),
                                            fill='black',
                                            outline='')

            # pt = self.canvas.create_oval(int(x - 1), int(y - 1),
            #                              int(x + 1), int(y + 1),
            #                              fill="black", outline='')
            self.points.append(pt)

        linres = analysis.single_linear_regression(self.data,
                                                   independent_variable,
                                                   dependent_variable)
        slope = linres[0]
        intercept = linres[1]
        rvalue = linres[2]
        pvalue = linres[3]
        stderr = linres[4]
        xmin = linres[5]
        xmax = linres[6]
        ymin = linres[7]
        ymax = linres[8]

        xends = [0.0, 1.0]
        yends = [((xmin * slope + intercept) - ymin) / (ymax - ymin),
                 ((xmax * slope + intercept) - ymin) / (ymax - ymin)]

        self.regression_endpoints = np.matrix([[0.0, 1.0],
                                               [yends[0], yends[1]], [0, 0],
                                               [1, 1]])

        self.line_of_fit = (self.canvas.create_line(
            self.regression_endpoints[0, 0],
            self.regression_endpoints[1, 0],
            self.regression_endpoints[0, 1],
            self.regression_endpoints[1, 1],
            fill="red"))

        self.regression_lines.append(self.line_of_fit)
        self.fit_label = tk.Label(self.canvas,
                                  text="slope: " + str(slope) +
                                  "\nIntercept: " + str(intercept) +
                                  "\nR-value: " + str(rvalue))
        self.fit_label.place(x=self.regression_endpoints[0, 1],
                             y=self.regression_endpoints[1, 1])
        self.updateAxes()
        self.updateFits()
        self.updatePoints()
示例#17
0
    def build_3d_linear_regression(self, independent_variables,
                                   dependent_variable):
        self.plot = analysis.normalize_columns_separately([
            independent_variables[0], independent_variables[1],
            dependent_variable
        ], self.data)

        # self.plot = self.data.limit_columns([independent_variable, dependent_variable])
        self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1))))

        # build the view matrix and transform the points
        vtm = self.view.build()
        pts = self.plot * vtm  # (vtm * self.plot.T).T

        # initialize self.size so that our movement functions don't break
        self.size = []
        # make a graphical point for each data point
        for i in range(len(pts)):
            self.size.append(1)
            x = pts[i, 0]
            y = pts[i, 1]
            pt = self.canvas.create_oval(int(x - 1),
                                         int(y - 1),
                                         int(x + 1),
                                         int(y + 1),
                                         fill="black",
                                         outline='')
            self.points.append(pt)

        linres = analysis.linear_regression(self.data, independent_variables,
                                            dependent_variable)
        slope0 = linres[0]
        slope1 = linres[1]

        intercept = linres[2]
        rvalue = linres[4]

        xmin = analysis.data_range([independent_variables[0]], self.data)[0][0]
        xmax = analysis.data_range([independent_variables[0]], self.data)[0][1]
        ymin = analysis.data_range([independent_variables[1]], self.data)[0][0]
        ymax = analysis.data_range([independent_variables[1]], self.data)[0][1]
        zmin = analysis.data_range([dependent_variable], self.data)[0][0]
        zmax = analysis.data_range([dependent_variable], self.data)[0][1]

        xends = [0.0, 1.0]
        yends = [
            ((xmin * slope0[0, 0] + intercept[0, 0]) - ymin) / (ymax - ymin),
            ((xmax * slope0[0, 0] + intercept[0, 0]) - ymin) / (ymax - ymin)
        ]
        zends = [
            ((xmin * slope1[0, 0] + intercept[0, 0]) - zmin) / (zmax - zmin),
            ((xmax * slope1[0, 0] + intercept[0, 0]) - zmin) / (zmax - zmin)
        ]

        self.regression_endpoints = np.matrix([[0.0, 1.0],
                                               [yends[0], yends[1]],
                                               [zends[0], zends[1]], [1, 1]])

        print("self.regression_endpoints", self.regression_endpoints)
        self.line_of_fit = (self.canvas.create_line(
            self.regression_endpoints[0, 0],
            self.regression_endpoints[1, 0],
            self.regression_endpoints[0, 1],
            self.regression_endpoints[1, 1],
            fill="red"))

        self.regression_lines.append(self.line_of_fit)
        self.fit_label = tk.Label(self.canvas,
                                  text="slope0: " + str(slope0[0, 0]) +
                                  "\nslope1: " + str(slope1[0, 0]) +
                                  "\nIntercept: " + str(intercept[0, 0]) +
                                  "\nR-value: " + str(rvalue))
        self.fit_label.place(x=self.regression_endpoints[0, 1],
                             y=self.regression_endpoints[1, 1])
        self.updateAxes()
        self.updateFits()
        self.updatePoints()
示例#18
0
    def buildPoints(self, inputHeaders):
        '''Takes list of headers, deletes existing objects representing data, builds new 
        set of data points.'''
        if len(self.PCAObjects) > 0:
            active = self.pcaBoxA.index(tk.ACTIVE)
            self.dataObj = self.PCAObjects[active]

        if len(self.ClusterObjects) > 0:
            active = self.pcaBoxA.index(tk.ACTIVE)
            self.dataObj = self.ClusterObjects[active]

        # clear the canvas
        for obj in self.objects:
            self.canvas.delete(obj)
        self.objects = []

        # first two variables are given. consider whether z header has been selected
        if (inputHeaders[2] != None):
            self.data2plot = analysis.normalize_columns_separately(inputHeaders[0:3], self.dataObj)
        else:
            self.data2plot = analysis.normalize_columns_separately(inputHeaders[0:2], self.dataObj)

        # consider whether color header has been selected
        if (inputHeaders[3] == None):
            for row in range(self.data2plot.shape[0]):
                self.clist.append((0,0,0))
        else:
            cmatrix = analysis.normalize_columns_separately([inputHeaders[3]], self.dataObj)
            for i in range(cmatrix.shape[0]):
                self.clist.append((int(float(cmatrix[i,0]) * 255), 0, int(255 * (1 - float(cmatrix[i, 0])))))

        # case: size selected, but color not selected

        # consider whether size header has been selected
        if (inputHeaders[4] == None):
            for row in range(self.data2plot.shape[0]):
                self.slist.append(3.0)
        else:
            smatrix = analysis.normalize_columns_separately([inputHeaders[4]], self.dataObj)
            for i in range(smatrix.shape[0]):
                self.slist.append(smatrix[i, 0] * 5)

        zeros = np.zeros((self.data2plot.shape[0], 1))
        ones = np.ones((self.data2plot.shape[0], 1))

        if (inputHeaders[2] == None):
            self.data2plot = np.hstack((self.data2plot, zeros, ones))
        else:
            self.data2plot = np.hstack((self.data2plot, ones))

        self.data2plot = self.data2plot.astype(np.float) 

        #build vtm and use it to transform the data
        vtm = self.view.build()
        pts = (vtm * self.data2plot.T).T 

        #create the canvas graphics objects
        for i in range(pts.shape[0]):
            self.objects.append(self.canvas.create_oval(pts[i, 0] - self.slist[i],
                                                        pts[i, 1] - self.slist[i],
                                                        pts[i, 0] + self.slist[i],
                                                        pts[i, 1] + self.slist[i],
                                                        fill='#%02x%02x%02x'%self.clist[i]))

        self.updatePoints()
    def buildPoints(self):
        # clear all data
        for pt in self.objects:
            self.canvas.delete(pt)
        self.objects = []

        # reset orientation
        self.vobj = view.View().clone()
        self.updateAxes()
        self.updateLabels()

        self.raw = self.dobj.getNumCol(self.rawheaders)
        self.data = analysis.normalize_columns_separately(
            self.headernames, self.dobj)

        if len(self.headernames) == 2:
            # add a column of 0's and homogeneous coordinate
            self.coords = np.hstack((self.data, self.data.shape[0] * [[0]],
                                     self.data.shape[0] * [[1]]))

            self.y_axis_label.set(self.headernames[1])
            self.z_axis_label.set(None)

        elif len(self.headernames) == 3:
            # add only homogeneous coordinate
            self.coords = np.hstack((self.data, self.data.shape[0] * [[1]]))
            self.y_axis_label.set(self.headernames[1])
            self.z_axis_label.set(self.headernames[2])

        if self.color_axis != None:
            # normalize color axis
            self.colors = analysis.normalize_columns_separately(
                [self.color_axis], self.dobj)
        else:
            # if not specified, use 1
            self.colors = np.matrix([[1]] * self.data.shape[0])

        if self.size_axis != None:
            # normalize size axis
            self.sizes = analysis.normalize_columns_separately(
                [self.size_axis], self.dobj)
        else:
            # if not specified, use 5
            self.sizes = np.matrix([[3]] * self.data.shape[0])

        # draw points
        vtm = self.vobj.build()
        ptcoords = vtm * self.coords.T
        for i in range(ptcoords.shape[1]):
            x0 = ptcoords[0, i] - float(self.sizes[i, 0])
            y0 = ptcoords[1, i] - float(self.sizes[i, 0])
            x1 = ptcoords[0, i] + float(self.sizes[i, 0])
            y1 = ptcoords[1, i] + float(self.sizes[i, 0])
            alpha = float(self.colors[i, 0])
            rgb = (int(alpha * 255), int((1 - alpha) * 255), 0)
            point = self.canvas.create_oval(x0,
                                            y0,
                                            x1,
                                            y1,
                                            fill='#%02x%02x%02x' % rgb,
                                            outline='')
            self.objects.append(point)
示例#20
0
	def alignPCA(self):
		if self.PCAanalysis != [] and self.PCAListbox.curselection() != ():
			self.PCA = self.PCAanalysis[self.PCAListbox.curselection()[0]]		
		
		if self.PCA is None:
			print 'you don\'t have any data'
			return
		
		headers = self.PCA.get_headers() + self.data.get_headers()
		
		variables = Dialogs.selectPCAData(self.root, headers)
		if variables.result == []:
			print "you didn't pick anything"
			return
			
		self.dataheaders = []
		
		if variables.result[0] < len(self.PCA.get_headers()):
			header = self.PCA.get_headers()[variables.result[0]]
			self.dataMatrix =analysis.normalize_columns_separately(self.PCA, (header,))
			#self.dataMatrix =self.PCA.get_data((header,))
		else:
			header = self.data.get_headers()[variables.result[0]-len(self.PCA.get_headers())]
			self.dataMatrix = analysis.normalize_columns_separately(self.data, (header,))
		self.dataheaders.append(header)
		
		for index in variables.result[1:]:
			if index < len(self.PCA.get_headers()):
				header = self.PCA.get_headers()[index]
				self.dataMatrix = np.hstack((self.dataMatrix, analysis.normalize_columns_separately(self.PCA, (header,))))
				#self.dataMatrix =np.hstack((self.dataMatrix, self.PCA.get_data((header,))))
			else:
				header = self.data.get_headers()[index-len(self.PCA.get_headers())]
				self.dataMatrix = np.hstack((self.dataMatrix, analysis.normalize_columns_separately(self.data, (header,))))
			print header
			self.dataheaders.append(header)
			
		if len(variables.result) == 2:
			self.dataMatrix = np.hstack((self.dataMatrix, np.zeros(shape=(len(self.dataMatrix),1))))
		
		#self.dataMatrix = self.PCA.get_data(headers)
		homogenous_coordinates = np.ones(shape =(len(self.dataMatrix), 1))
		self.dataMatrix = np.hstack((self.dataMatrix , homogenous_coordinates))
		
		if variables.resultc is not None:
			if variables.resultc < len(self.PCA.get_headers()):
				header = self.PCA.get_headers()[variables.resultc]
				if header[:7] == 'cluster':
					self.colorMatrix = self.PCA.get_data((header,))
				else:
					self.colorMatrix =analysis.normalize_columns_separately(self.PCA, (header,))
				self.colorResult = header
			else:
				header = self.data.get_headers()[variables.resultc-len(self.PCA.get_headers())]
				self.colorMatrix =analysis.normalize_columns_separately(self.data, (header,))
				self.colorResult = header
		else:
			self.colorResult = None
			self.colorMatrix = None
		
		if variables.results is not None:
			if variables.resultc < len(self.PCA.get_headers()):
				header = self.PCA.get_headers()[variables.results]
				self.sizeMatrix =analysis.normalize_columns_separately(self.PCA, (header,))
				self.sizeResult = header
			else:
				header = self.data.get_headers()[variables.results-len(self.PCA.get_headers())]
				self.sizeMatrix =analysis.normalize_columns_separately(self.data, (header,))
				self.sizeResult = header
		else:
			self.sizeResult = None
			self.sizeMatrix = None	
		self.rows = len(self.dataMatrix)
		self.buildAxes()
示例#21
0
def main():
    numpy.set_printoptions(suppress=True)
    print("\n----- Database Info -----")
    if len(sys.argv) < 2:
        print('Usage: python %s <csv filename>' % (sys.argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = data.Data(sys.argv[1])
    print("\nName: ", dobj.get_filename())
    # print out information about the dat
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of numeric columns: ', dobj.get_num_dimensions())

    # print out the headers
    print("\nHeaders:")
    headers = dobj.get_headers()
    s = headers[0]
    for header in headers[1:]:
        s += ", " + header
    print(s)

    # print out the headers
    print("\nNumeric Headers:")
    nheaders = dobj.get_numericheaders()
    s = nheaders[0]
    for header in nheaders[1:]:
        s += ", " + header
    print(s)

    # print out the types
    print("\nTypes:")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    r = analysis.data_range(headers, dobj)
    print("Data Range:\n ", r)
    mean = analysis.mean(headers, dobj)
    print("Mean: \n", mean)

    std = analysis.stdev(headers, dobj)
    print("Standard Deviation: \n", std)
    if headers == nheaders:
        nor_m1 = analysis.normalize_columns_separately(headers, dobj)
        print("Normalized Columns Separately: \n", nor_m1)
    if headers == nheaders:
        nor_m2 = analysis.normalize_columns_together(headers, dobj)
        print("Normalized Columns Together: \n", nor_m2)

    s = analysis.sumup(headers, dobj)
    print("Sum:\n", s)

    print("Variance:\n", analysis.variance(headers, dobj))

    # EXTENSION5 ADD COLUMN
    dobj.add_colummn('new col', 'numeric',
                     [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
    print(
        "\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]"
    )
    print("----- New Matrix: -----")
    m = dobj.get_whole_matrix()
    print(m)
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of numeric columns: ', dobj.get_num_dimensions())
    print("---------------------------------")

    # EXTENSION6 WRITE TO A CSV file
    a = numpy.asarray(m)
    with open('foo.csv', 'w') as outputfile:
        wr = csv.writer(outputfile, delimiter=',')
        wr.writerow(dobj.get_headers())
        wr.writerow(dobj.get_types())
        for ls in a:
            wr.writerow(ls)
示例#22
0
 def drawClustering(self):
     if self.clusterData == None:
         print "Run a cluster analysis"
         return
     
     dialog = DataDialog(self.root, self.clusterData.get_headers(), "Choose columns")
     if dialog.x == "NaN" or dialog.y=="NaN":
         print "You must choose columns for the x and y axes"
         return
     
     self.clear()
     self.reset()
         
     if (dialog.z != ""):
         matrix = analysis.normalize_columns_separately(self.clusterData, 
                                 [dialog.x, dialog.y, dialog.z])
     else:
         matrix = analysis.normalize_columns_separately(self.clusterData, 
                                 [dialog.x, dialog.y])
         zeros = np.zeros(self.data.get_raw_num_rows())
         matrix = np.hstack( (matrix, np.matrix(zeros).T) )
         
     ones = np.ones(self.data.get_raw_num_rows())
     self.dataMatrix = np.hstack( (matrix, np.matrix(ones).T) )
     # calculate view coordinates
     vtm = self.view.build()
     pts = (vtm * self.dataMatrix.T).T
     
     if (dialog.size != ''):
         size = analysis.normalize_columns_separately(self.clusterData, [dialog.size])
         self.sizes = size.T.tolist()[0]
         for i in range(len(self.sizes)):
                 self.sizes[i] = int(math.sqrt(self.sizes[i])*3+1)
     else:
         self.sizes = [2]*self.clusterData.get_raw_num_rows()
     
     # handle color data
     if dialog.color != "":
         self.colorMatrix = self.clusterData.get_data([dialog.color])
         if dialog.pre.get() == 1:
             self.colors = self.preselectColors(self.colorMatrix)
         else:
             # normalize column with mean and standard deviation
             mean = np.mean(self.colorMatrix, axis=0)
             std = np.std(self.colorMatrix, axis=0)
             tmp = self.colorMatrix - mean
             if std == 0:
                 color = np.matrix( np.zeros((self.colorMatrix.shape[0],
                                         self.colorMatrix.shape[1]) ))
             else:
                 color = tmp / np.matrix(std, dtype=float)
             color = (color+2.5)/5
             # create color list
             self.colors = color.T.tolist()[0]
             for i in range(self.data.get_raw_num_rows()):
                 if color[i, 0] < 0:
                     color[i, 0] = 0
                 if color[i, 0] > 1:
                     color[i, 0] = 1
                 rgb = ( 0, color[i, 0]*255, (1-color[i, 0])*255 )
                 self.colors[i] = ('#%02x%02x%02x' % rgb)
     else:
         self.colors = ['#000000']*self.data.get_raw_num_rows()
     
     for i in range(len(pts)):
         pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], 
                     pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], 
                     pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='')
         self.objects.append(pt)
	def main(argv):

		# test command line arguments
		if len(argv) < 2:
			print( 'Usage: python %s <csv filename>' % (argv[0]))
			exit(0)

		# create a data object, which reads in the data
		dobj = Data(argv[1])

		# print out information about the dat
		print('Number of rows:    ', dobj.get_num_points() )
		print('Number of columns: ', dobj.get_num_dimensions() )

		# print out the headers
		print("\nHeaders:")
		headers = dobj.get_raw_headers()
		s = headers[0]
		for header in headers[1:]:
			s += "," + header
		print( s )

		# print out the types
		print("\nTypes:")
		types = dobj.get_raw_types()
		s = types[0]
		for type in types[1:]:
			s += ", " + type
		print( s )

		# print out a single row
		print("\nPrinting row index 2:")
		print( dobj.get_row( 2 ) )

		# print out cols
		c = dobj.getCol([dobj.get_raw_headers()[0],dobj.get_raw_headers()[1]] )
		print("\Select the 1st and 2nd col:")
		print( c )

		# print out all of the data
		print("\n All Data:")
		headers = dobj.get_raw_headers()
		for i in range(dobj.get_num_points()):
			s = str( dobj.get_value( headers[0], i ))
			for header in headers[1:]:
				s += str(dobj.get_value( header, i ))
			print(s)

		# EXTENSION
		print("\nAdd a Column")
		dobj.addColumn("new col", "numeric", [[0],[1],[2]])
		print('Number of columns: ', dobj.get_num_dimensions())


		print("--- testing manipulations on the 1st and the 3rd NUMERIC columns:---")
		d = dobj.getCol([dobj.get_num_headers()[0],dobj.get_num_headers()[2]] )
		print(d)
		# test normalized columns
		print("\n Normalize by columns")
		sep_norm = analysis.normalize_columns_separately([dobj.get_num_headers()[0],dobj.get_num_headers()[2]], dobj)
		print(sep_norm)

		# test normalized matrix
		print("\n Normalize the whole matrix")
		tog_norm = analysis.normalize_columns_together([dobj.get_num_headers()[0],dobj.get_num_headers()[2]], dobj)
		print(tog_norm)  
示例#24
0
 def buildLinearRegression(self, indx, indz, dep, export, filename):
     if (indz != ''):
         matrix = analysis.normalize_columns_separately(self.data, [indx, dep, indz])
     else:
         matrix = analysis.normalize_columns_separately(self.data, [indx, dep])
         zeros = np.zeros(self.data.get_raw_num_rows())
         matrix = np.hstack( (matrix, np.matrix(zeros).T) )
         
     ones = np.ones(self.data.get_raw_num_rows())
     self.dataMatrix = np.hstack( (matrix, np.matrix(ones).T) )
     # calculate view coordinates
     vtm = self.view.build()
     pts = (vtm * self.dataMatrix.T).T
     
     # use points with default size and color
     self.sizes = [2]*self.data.get_raw_num_rows()
     self.colors = ['#000000']*self.data.get_raw_num_rows()
     for i in range(len(pts)):
         pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], 
                     pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], 
                     pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='')
         self.objects.append(pt)
     
     # calculate single variable linear regression
     if (indz == ''):
         slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
                                     self.data.get_data([indx, dep]))
         ranges = analysis.data_range(self.data, [indx, dep])
         end1y = ((ranges[0][0]*slope+intercept)-ranges[1][0])/(ranges[1][1]-ranges[1][0])
         end2y = ((ranges[0][1]*slope+intercept)-ranges[1][0])/(ranges[1][1]-ranges[1][0])
         self.regressionMatrix = np.matrix([ [0.0, end1y, 0.0, 1.0],
                                             [1.0, end2y, 0.0, 1.0] ])
                                             
         eqn = "y = %.3fx + %.3f \nR = %.3f" % (slope, intercept, r_value)
         data = "p = %.3f \nStandard error = %.3f" % (p_value, std_err)
         out = eqn + "\n" + data
         
     # calculate muliple variable linear regression
     else:
         b, sse, r2, t, p = analysis.linear_regression(self.data, [indx, indz], dep)
         ranges = analysis.data_range(self.data, [indx, indz, dep])
         end1y = ranges[0][0]*b[0] + ranges[1][0]*b[1] + b[2]
         end1y = (end1y - ranges[2][0])/(ranges[2][1] - ranges[2][0])
         end2y = ranges[0][1]*b[0] + ranges[1][1]*b[1] + b[2]
         end2y = (end2y - ranges[2][0])/(ranges[2][1] - ranges[2][0])
         self.regressionMatrix = np.matrix([ [0.0, end1y, 0.0, 1.0],
                                             [1.0, end2y, 1.0, 1.0] ])
                                             
         eqn =  "y = %.3fx + %.3fz + %.3f \nR^2 = %.3f" % (b[0], b[1], b[2], r2)
         sse_data = "Sum-squared error = %.3f" % (sse)
         p_data = "p = [%.3f, %.3f, %.3f]" % (p[0, 0], p[0, 1], p[0, 2])
         t_data = "t-statistic = [%.3f, %.3f, %.3f]" % (t[0, 0], t[0, 1], t[0, 2])
         out = eqn + "\n" + sse_data + "\n" + p_data + "\n" + t_data
         
     # display regression onscreen
     self.canvas.itemconfig(self.labels[0], text="x")
     self.canvas.itemconfig(self.labels[1], text="y")
     self.canvas.itemconfig(self.labels[2], text="z")
     endpts = (vtm * self.regressionMatrix.T).T
     l = self.canvas.create_line(endpts[0, 0], endpts[0, 1], endpts[1, 0], 
                                     endpts[1, 1], fill="red")
     self.regressionObjects.append(l)
     regLabel = self.canvas.create_text(endpts[1, 0]+120, endpts[1, 1]+20, text=eqn)
     self.labels.append(regLabel)
     title = "Linear regression for " + str(self.fn)
 
     # write linear regression function to file
     if (export == 1):
         file = open(filename + ".txt", 'w')
         file.write(title + "\n" + out)
         file.close()
示例#25
0
	def buildLinearRegression(self,headers):

		norm = an.normalize_columns_separately(self.data, headers)
		zeromatrix = np.zeros(norm.shape[0])
		onesmatrix = np.ones(norm.shape[0])

		# x and y are automatically first two dimensions
		xdatahead = headers[0]
		ydatahead = headers[1]


		if xdatahead != None and ydatahead != None:
			dmatrix = np.matrix(norm)
			nmatrix = np.matrix((zeromatrix, onesmatrix)).T
			self.dataPointMatrix = np.hstack((dmatrix, nmatrix))

		vtm = self.v.build()
		pts = (vtm * self.dataPointMatrix.T).T

		for i in range(pts.shape[0]):
			x = pts[i, 0]
			y = pts[i, 1]
			dx = 5
			pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx,
										 fill='blue', outline='')
			self.objects.append(pt)

		xdata=np.array(self.data.get_data([xdatahead]).T)[0]
		ydata=np.array(self.data.get_data([ydatahead]).T)[0]

		slope, intercept, r_value, p_value, slope_std_error = st.linregress(xdata,ydata)
		predict_y = intercept + slope * xdata
		pred_error = ydata - predict_y
		degrees_of_freedom = len(xdata) - 2
		r2_value=r_value*r_value
		residual_std_error = np.sqrt(np.sum(pred_error ** 2) / degrees_of_freedom)

		rangex =an.data_range(self.data,[xdatahead])
		rangey=an.data_range(self.data,[ydatahead])

		yend0=((rangex[0,0]*slope+intercept)-rangey[0,0])/(rangey[0,1]-rangey[0,0])
		yend1=((rangex[0,1]*slope+intercept)-rangey[0,0])/(rangey[0,1]-rangey[0,0])

		print "minx", rangex[0,0]
		print "maxx", rangex[0,1]
		print "miny", rangey[0,0]
		print "maxy", rangey[0,1]

		linemtrxcol1=np.matrix([[0.0],[yend0],[0.0],[1.0]])
		linemtrxcol2=np.matrix([[1.0],[yend1],[0.0],[1.0]])
		self.linRegEndpoints=np.hstack((linemtrxcol1,linemtrxcol2))
		print "vtm", vtm
		print "linRegEndpoints", self.linRegEndpoints
		le=vtm*self.linRegEndpoints
		print "le", le
		
		self.linRegLines.append(self.canvas.create_line(le[0, 0], le[1, 0], le[0, 1], le[1, 1], fill="red", tags="X"))


		self.statslabel.delete('1.0', tk.END)
		self.statslabel.insert(tk.END, "Slope: "+str(slope) + " " + "Intercept: " + str(intercept)+ " " + "r^2 value: "+ str(r2_value))
示例#26
0
	def buildLinearRegression(self, independent, dependent):
		dx = 5
		dy = 5

		#task5.1
		#Extract Results and Assign them to Variables
		xvar = independent
		yvar = dependent
		#normalize columns separately
		a = analysis.normalize_columns_separately([xvar], self.data)
		b = analysis.normalize_columns_separately([yvar], self.data)
		c = np.hstack((a, b))

		#task5.2
		#add a third column of zeros to the matrix
		z1 = np.zeros((self.data.get_num_rows(), 1))
		d = np.hstack((c, z1))

		#task5.3
		#add a fourth column of zeros to the matrix
		z2 = np.ones((self.data.get_num_rows(), 1))
		self.data2matrix = np.hstack((d, z2))

		#task5.4
		#build the VTM
		vtm = self.view.build()
		#multiply it by data points
		tp = (vtm*self.data2matrix.T).T

		#build points
		for i in range(tp.shape[0]):
			tx = tp[i, 0]
			ty = tp[i, 1]

			pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy,
												 fill="black", outline='')
			self.objects.append(pt)

		#task5.5
		#calculate linear regression
		xy = self.data.get_data([xvar,yvar])
		#yu = self.data.get_data([yvar])
		###help from Theo S.
		slope, intercept, r_value, p_value, r2 = sc.linregress(xy)
		print slope, intercept, r2
		
		#task5.6
		#get range
		xrange = analysis.data_range([xvar], self.data)
		yrange = analysis.data_range([yvar], self.data)
		
		#task5.7
		#make endpoints
		value1 = ((xrange[0][0] * slope + intercept) - yrange[0][0]) / (yrange[0][1] - yrange[0][0])
		value2 = ((xrange[0][1] * slope + intercept) - yrange[0][0]) / (yrange[0][1] - yrange[0][0])
		print "hi"
		self.LRendpoints = np.matrix([ [0, value1, 0, 1],
										[1, value2, 0, 1] ])
										
		#task5.8
		#multiply the line endpoints by the vtm, 
		#then make tk obj out of endpoints
		points = (vtm * self.LRendpoints.T).T		
		self.regLine = self.canvas.create_line(points[0,0], points[0,1], points[1,0], points[1,1], fill= "Red", width = 3)
		self.LRobjects.append(self.regLine)
		
		#task5.9
		self.lineLabel = tk.Label(self.canvas, text = "Linear Regression:" + str(slope))
		self.lineLabel.place(x=points[1,0], y=points[1,1])
示例#27
0
    print("Range of Numeric Data")
    print(analysis.data_range(data, data.get_headers()), "\n")

    print("IQR of the Numeric Columns")
    print(analysis.data_iqr(data, data.get_headers()), "\n")

    print("Mean of the Numeric Columns")
    print(analysis.data_mean(data, data.get_headers()), "\n")

    print("Median of the Numeric Columns")
    print(analysis.data_median(data, data.get_headers()), "\n")

    print("StDev of the Numeric Columns")
    print(analysis.data_stdev(data, data.get_headers()), "\n")

    print("Variance of the Numeric Columns")
    print(analysis.data_variance(data, data.get_headers()), "\n")

    print("Normalized Numeric Columns")
    print(analysis.normalize_columns_separately(data, data.get_headers()),
          "\n")

    print("Normalized Numeric Array")
    print(analysis.normalize_columns_together(data, data.get_headers()), "\n")

    print("Data with first row added to end")
    data.add_col("added data", "numeric", data.subset(cols=[0]))
    data.__str__()
    print()
示例#28
0
	def buildPoints(self, headers, color, size, shapes, event=None):
		selected_headers = headers
		
		if selected_headers[0] == None:
			print "Please Select an X Plot"
			return
		if selected_headers[1] == None:
			print "Please Select a Y Plot"
			return
		if selected_headers[2] == None:
			print "Please Select a Z Plot"
			return
		if color[0] == None:
			print "Please Select a Color"
			return
		if size[0] == None:
			print "Please Select a Size"
			return
		if shapes[0] == None:
			print "Please Select a Shape"
			return

		self.totalReset()
	

		temp = []
		dx = int(size)
		dy = int(size)
		a = analysis.normalize_columns_separately(selected_headers, self.data)

		for i in range(a.shape[0]):
			x = a[i, 0]
			y = a[i, 1]

			if len(selected_headers) == 2:
				z = 0
				self.xaxisLegend.set("X-axis:" + selected_headers[0])
				self.yaxisLegend.set("Y-axis:" + selected_headers[1])
				
			elif len:
				z = a[i, 2]
				self.xaxisLegend.set("X-axis:" + selected_headers[0])
				self.yaxisLegend.set("Y-axis:" + selected_headers[1])
				self.zaxisLegend.set("Z-axis:" + selected_headers[2])
				self.colorLegend.set("Color:" + color)
				self.sizeLegend.set("Size:" + size)
				self.shapeLegend.set("Shape:" + shapes)

			temp.append([x, y, z, 1])
		#Make VTM
		vtm = self.view.build()
		#Convert Data to Matrix
		self.data2matrix = np.matrix(temp)

		print shapes

		tp = (vtm*self.data2matrix.T).T

		for i in range(tp.shape[0]):
			tx = tp[i, 0]
			ty = tp[i, 1]
			tz = tp[i, 2]

			#Extension 1 Continued...
			if shapes == "oval":
				pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy,
												 fill=color, outline='')
				self.objects.append(pt)
			if shapes == "rectangle":
				pt = self.canvas.create_rectangle(tx - dx, ty - dy, tx + dx, ty + dy,
												 fill=color, outline='')
				self.objects.append(pt)
			if shapes == "arc":
				pt = self.canvas.create_arc(tx - dx, ty - dy, tx + dx, ty + dy,
												 fill=color, outline='')
				self.objects.append(pt)
		return
示例#29
0
	def buildLinearRegression(self):
		#self.uniqueColors = False
		if self.gRegressLine is not None:
			self.canvas.delete(self.gRegressLine)
			self.canvas.delete(self.glinText)
		self.gRegressLine = None
		temp_matrix = analysis.normalize_columns_separately(self.data, self.dataheaders)
		self.rows = len(temp_matrix)
		if len(self.dataheaders) == 2:
			temp_matrix = np.hstack((temp_matrix, np.zeros(shape=(self.rows,1))))
		self.dataMatrix = np.hstack((temp_matrix, np.ones(shape=(self.rows,1))))
		self.buildAxes()
		if len(self.dataheaders) == 2: 
			slope, self.intercept, r_value, self.p_value, self.std_err = scipy.stats.linregress(self.data.get_data(self.dataheaders))
			self.slope.append(slope)
			self.r_squared = r_value**2
			data_range = analysis.data_range(self.data, self.dataheaders)
			high = ((data_range[0][0]*self.slope[0] + self.intercept)-data_range[1][1])/(data_range[1][0]-data_range[1][1])
			low =  ((data_range[0][1]*self.slope[0] + self.intercept)-data_range[1][1])/(data_range[1][0]-data_range[1][1])
			#print low,high
			self.endpoints = np.matrix([[0, low, 0, 1],
											[1, high, 0, 1]])
			vtm = self.view.build()
			pts = (vtm * self.endpoints.T).T
			self.gRegressLine = self.canvas.create_line(pts[0,0], pts[0,1], pts[1,0], pts[1,1], fill = "red")
			linText = ("Slope: %.3f, Intercept: %.3f, R Squared: %.3f"%(slope, self.intercept, r_value**2))
			self.glinText = self.canvas.create_text(pts[1,0], pts[1,1], text = linText)
		else:
			regressstuffs = analysis.linear_regression(self.data, self.dataheaders[:2], [self.dataheaders[2],])
			self.intercept = regressstuffs[0][0]
			self.slope.append(regressstuffs[0][1])
			self.slope.append(regressstuffs[0][2])
			self.std_err = regressstuffs[1]
			self.r_squared = regressstuffs[2]
			self.p_value = regressstuffs[4]
			#print intercept
			
			data_range = analysis.data_range(self.data, self.dataheaders)
			highx0 = ((data_range[0][0]*self.slope[0] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1])
			lowx0 =  ((data_range[0][1]*self.slope[0] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1])
			#print lowx0, highx0

			highx1 = ((data_range[1][0]*self.slope[1] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1])
			lowx1 =  ((data_range[1][1]*self.slope[1] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1])
			#print lowx1,highx1
			
			#x1 goes in the x direction, x2 in y, dep goes in Z
			self.endpoints = np.matrix([[0, 0, lowx0, 1],
										[1, 0, highx0, 1],
										[0, 0, lowx1, 1],
										[0, 1, highx1, 1]])
			vtm = self.view.build()
			pts = (vtm * self.endpoints.T).T
			#print pts
			#self.gRegressLine = self.canvas.create_rectangle(pts[0,0],pts[2,1],pts[1,0],pts[3,1])
			self.gRegressLines = []
			#I made each line in the plane a different color because I wasn't sure if things were working right so I wanted to be able to differentiate them
			#I think this should be a 3D visualization of the linear regression, but I might have done something horribly wrong(it seems to work as a plane) for 
			self.gRegressLines.append(self.canvas.create_line(pts[0,0], pts[0,1], pts[1,0], pts[1,1], fill = "red"))
			self.gRegressLines.append(self.canvas.create_line(pts[2,0], pts[2,1], pts[3,0], pts[3,1], fill = "green"))
			self.gRegressLines.append(self.canvas.create_line(pts[0,0], pts[0,1], pts[2,0], pts[2,1], fill = "blue"))
			self.gRegressLines.append(self.canvas.create_line(pts[1,0], pts[1,1], pts[3,0], pts[3,1], fill = "black"))
			#self.gRegressLine = self.canvas.create_polygon(pts[0,0],pts[0,1],
			#												pts[1,0], pts[1,1],
			#												pts[2,0], pts[2,1],
			#												pts[3,0], pts[3,1], fill = '', outline = "red")
			linText = ("X0 Slope: %.3f, X1 Slope: %.3f, Intercept: %.3f, R Squared: %.3f"%(self.slope[0], self.slope[1], self.intercept, self.r_squared))
			self.glinText = self.canvas.create_text(pts[1,0], pts[1,1], text = linText)