def plot_and_pop_data_points(self, xkey=0, ykey=1, ekey=None, ckeys=[], **kwargs): """ This will plot the columns specified by the scripts and then wait for clicks from the user, popping data points nearest the clicks. Right-click quits. xkey,ykey,ekey column keys to plot ckeys list of columns to pop, using pop_data_point() Set ckeys=[] to pop from all columns, and ckey="these" to pop only from the plotted columns, or a list of ckeys from which to pop. """ if ckeys == "these": ckeys = [xkey, ykey, ekey] # plot the data. This should generate self.xdata and self.ydata self.plot(xkey, ykey, ekey, **kwargs) a = _pylab.gca() # start the loop to remove data points raw_input("Zoom in on the region of interest. <enter>") print "Now click near the data points you want to pop. Right-click to finish." poppies = [] while True: # get a click clicks = _pt.ginput() if len(clicks) == 0: return poppies [cx, cy] = clicks[0] # search through x and y for the closest point to this click diff = (self.xdata - cx)**2 + (self.ydata - cy)**2 i = _fun.index(min(diff), diff) # now pop! poppies.append(self.pop_data_point(i, ckeys)) # now get the current zoom so we can replot xlim = a.get_xlim() ylim = a.get_ylim() # replot and rezoom _pylab.hold(True) self.plot(xkey, ykey, ekey, **kwargs) a.set_xlim(xlim) a.set_ylim(ylim) _pylab.hold(False) _pylab.draw()
def plot_and_pop_data_points(self, xkey=0, ykey=1, ekey=None, ckeys=[], **kwargs): """ This will plot the columns specified by the scripts and then wait for clicks from the user, popping data points nearest the clicks. Right-click quits. xkey,ykey,ekey column keys to plot ckeys list of columns to pop, using pop_data_point() Set ckeys=[] to pop from all columns, and ckey="these" to pop only from the plotted columns, or a list of ckeys from which to pop. """ if ckeys == "these": ckeys = [xkey, ykey, ekey] # plot the data. This should generate self.xdata and self.ydata self.plot(xkey, ykey, ekey, **kwargs) a = _pylab.gca() # start the loop to remove data points raw_input("Zoom in on the region of interest. <enter>") print "Now click near the data points you want to pop. Right-click to finish." poppies = [] while True: # get a click clicks = _pt.ginput() if len(clicks)==0: return poppies [cx,cy] = clicks[0] # search through x and y for the closest point to this click diff = (self.xdata-cx)**2 + (self.ydata-cy)**2 i = _fun.index(min(diff), diff) # now pop! poppies.append(self.pop_data_point(i, ckeys)) # now get the current zoom so we can replot xlim = a.get_xlim() ylim = a.get_ylim() # replot and rezoom _pylab.hold(True) self.plot(xkey, ykey, ekey, **kwargs) a.set_xlim(xlim) a.set_ylim(ylim) _pylab.hold(False) _pylab.draw()
def load_file(self, path="ask", first_data_line="auto", filters="*.*", text="Select a file, FACEPANTS.", default_directory=None, header_only=False): """ This will load a file, storing the header info in self.headers, and the data in self.columns If first_data_line="auto", then the first data line is assumed to be the first line where all the elements are numbers. If you specify a first_data_line (index, starting at 0), the columns need not be numbers. Everything above will be considered header information and below will be data columns. In both cases, the line used to label the columns will always be the last header line with the same (or more) number of elements as the first data line. """ if default_directory==None: default_directory = self.directory # this loads the file, getting the header and the column values, if self.debug: print "resetting all the file-specific stuff, path =", path self.clear_columns() self.clear_headers() self.xdata = None self.ydata = None self.eydata = None if path=="ask": path = _dialogs.SingleFile(filters=self.file_extension, default_directory=self.directory, text=text) self.path = path if path==None: print "Aborted." return False # open said file for reading, read in all the lines and close t0 = time.time() if self.debug: print time.time()-t0, "seconds: starting read_lines()" self.lines = _fun.read_lines(path) if self.debug: print time.time()-t0, "seconds: done." # break up the path into parts and take the last bit (and take a stab at the legend string) self.legend_string = path.split(_os.path.sep)[-1] if self.legend_string[0] == '_': self.legend_string = '|' + self.legend_string # read in the header information if self.debug: print time.time()-t0, "seconds: start reading headers" ckeys_line = -2 for n in range(len(self.lines)): # split the line by the delimiter s = self.lines[n].strip().split(self.delimiter) # remove a trailing whitespace entry. if len(s) and s[-1].strip() == '': s.pop(-1) # first check and see if this is a data line (all elements are numbers) if first_data_line=="auto" and _fun.elements_are_numbers(s): # we've reached the first data line first_data_line = n if self.debug: print "first data line =", n # quit the header loop break; # first thing to try is simply evaluating the remaining string try: remainder = list(s) hkey = remainder.pop(0) remainder = _fun.join(remainder).strip() self.insert_header(hkey, eval(remainder)) # if that didn't work, try all the other complicated/flexible stuff except: # if this isn't an empty line and has strings for elements, assume it's a column key line for now # (we keep overwriting this until we get to the first data line) if len(s) > 0: # overwrite the ckeys, and note the line number self.ckeys = list(s) # this makes a new instance of the list so it doesn't lose the first element! ckeys_line = n # if it's length 1, it's just some word. Store a dummy string in there. if len(s) == 1: s.append('') # Also assume it is a header line. Here should be at least two elements in a header element if len(s) == 2: # If there are exactly two elemenents, just store the header constant try: self.headers[s[0]] = float(s[1]) # this one is a number except: try: self.headers[s[0]] = complex(s[1].replace('(','').replace(')','')) # it's a complex number except: self.headers[s[0]] = s[1] # this one is a string # store the key in a variable like the other cases l = s[0] else: # if there are more than 2 elements, then this is an array or a phrase # if all the elements after the first are numbers, this is an array row if _fun.elements_are_numbers(s, 1): # just add this to the headers as an array for n in range(1,len(s)): s[n] = float(s[n]) # pop off the first element, this is the string used to access the array l = s.pop(0) self.headers[l] = s # in either case, we now have a header key in the variable l. # now add it to the ordered list, but only if it doesn't exist if _fun.index(l, self.hkeys) < 0: self.hkeys.append(l) else: print "Duplicate header:", l if self.debug: print "header '"+l+"' = "+str(self.headers[l])[0:20]+" ..." # Make sure first_data_line isn't None (which happens if there's no data) if first_data_line == "auto": print "Could not find a line of pure data!" return # at this point we've found the first_data_line, and ckeys_line is correct or -2 # count the number of data columns column_count = len(self.lines[first_data_line].strip().split(self.delimiter)) # check to see if ckeys line is first_data_line-1, and that it is equal in length to the # number of data columns. If it isn't, it's a false ckeys line if ckeys_line == first_data_line-1 and len(self.ckeys) >= column_count: # it is valid. # if we have too many column keys, mention it if len(self.ckeys) > column_count: print "Note: more ckeys than columns (stripping extras)" # remove this line from the header try: self.pop_header(self.ckeys[0]) except: print "Couldn't pop column labels from header. Weird." else: # it is an invalid ckeys line. Generate our own! self.ckeys = [] for m in range(0, column_count): self.ckeys.append("column_"+str(m)) # for good measure, make sure to trim down the ckeys array to the size of the data columns for n in range(column_count, len(self.ckeys)): self.ckeys.pop(-1) # now we have a valid set of column ckeys one way or another, and we know first_data_line. if header_only: return # initialize the columns arrays # I did benchmarks and there's not much improvement by using numpy-arrays here. for label in self.ckeys: self.columns[label] = [] # start grabbing the data if self.debug: print time.time()-t0, "seconds: starting to read data" TimerStart() for n in range(first_data_line, len(self.lines)): # split the line up s = self.lines[n].strip().split(self.delimiter) # now start filling the column, ignoring the empty or bad data lines for m in range(len(s)): try: self.columns[self.ckeys[m]].append(float(s[m])) except: try: self.columns[self.ckeys[m]].append(complex(s[m][1:len(s[m])-1])) except: pass if self.debug: print time.time()-t0, "seconds: yeah." # now loop over the columns and make them all hard-core numpy columns! TimerStart() for k in self.ckeys: self.columns[k] = array(self.columns[k]) if self.debug: print time.time()-t0, "seconds: totally." # now, as an added bonus, rename some of the obnoxious headers for k in self.obnoxious_ckeys: if self.columns.has_key(k): if self.debug: print "renaming column",k,self.obnoxious_ckeys[k] self.columns[self.obnoxious_ckeys[k]] = self.columns[k]
def load_file(self, path="ask", first_data_line="auto", filters="*.*", text="Select a file, FACEPANTS.", default_directory=None, header_only=False): """ This will load a file, storing the header info in self.headers, and the data in self.columns If first_data_line="auto", then the first data line is assumed to be the first line where all the elements are numbers. If you specify a first_data_line (index, starting at 0), the columns need not be numbers. Everything above will be considered header information and below will be data columns. In both cases, the line used to label the columns will always be the last header line with the same (or more) number of elements as the first data line. """ if default_directory == None: default_directory = self.directory # this loads the file, getting the header and the column values, if self.debug: print "resetting all the file-specific stuff, path =", path self.clear_columns() self.clear_headers() self.xdata = None self.ydata = None self.eydata = None if path == "ask": path = _dialogs.SingleFile(filters=self.file_extension, default_directory=self.directory, text=text) self.path = path if path == None: print "Aborted." return False # open said file for reading, read in all the lines and close t0 = time.time() if self.debug: print time.time() - t0, "seconds: starting read_lines()" self.lines = _fun.read_lines(path) if self.debug: print time.time() - t0, "seconds: done." # break up the path into parts and take the last bit (and take a stab at the legend string) self.legend_string = path.split(_os.path.sep)[-1] if self.legend_string[0] == '_': self.legend_string = '|' + self.legend_string # read in the header information if self.debug: print time.time() - t0, "seconds: start reading headers" ckeys_line = -2 for n in range(len(self.lines)): # split the line by the delimiter s = self.lines[n].strip().split(self.delimiter) # remove a trailing whitespace entry. if len(s) and s[-1].strip() == '': s.pop(-1) # first check and see if this is a data line (all elements are numbers) if first_data_line == "auto" and _fun.elements_are_numbers(s): # we've reached the first data line first_data_line = n if self.debug: print "first data line =", n # quit the header loop break # first thing to try is simply evaluating the remaining string try: remainder = list(s) hkey = remainder.pop(0) remainder = _fun.join(remainder).strip() self.insert_header(hkey, eval(remainder)) # if that didn't work, try all the other complicated/flexible stuff except: # if this isn't an empty line and has strings for elements, assume it's a column key line for now # (we keep overwriting this until we get to the first data line) if len(s) > 0: # overwrite the ckeys, and note the line number self.ckeys = list( s ) # this makes a new instance of the list so it doesn't lose the first element! ckeys_line = n # if it's length 1, it's just some word. Store a dummy string in there. if len(s) == 1: s.append('') # Also assume it is a header line. Here should be at least two elements in a header element if len(s) == 2: # If there are exactly two elemenents, just store the header constant try: self.headers[s[0]] = float( s[1]) # this one is a number except: try: self.headers[s[0]] = complex( s[1].replace('(', '').replace( ')', '')) # it's a complex number except: self.headers[s[0]] = s[ 1] # this one is a string # store the key in a variable like the other cases l = s[0] else: # if there are more than 2 elements, then this is an array or a phrase # if all the elements after the first are numbers, this is an array row if _fun.elements_are_numbers(s, 1): # just add this to the headers as an array for n in range(1, len(s)): s[n] = float(s[n]) # pop off the first element, this is the string used to access the array l = s.pop(0) self.headers[l] = s # in either case, we now have a header key in the variable l. # now add it to the ordered list, but only if it doesn't exist if _fun.index(l, self.hkeys) < 0: self.hkeys.append(l) else: print "Duplicate header:", l if self.debug: print "header '" + l + "' = " + str( self.headers[l])[0:20] + " ..." # Make sure first_data_line isn't None (which happens if there's no data) if first_data_line == "auto": print "Could not find a line of pure data!" return # at this point we've found the first_data_line, and ckeys_line is correct or -2 # count the number of data columns column_count = len(self.lines[first_data_line].strip().split( self.delimiter)) # check to see if ckeys line is first_data_line-1, and that it is equal in length to the # number of data columns. If it isn't, it's a false ckeys line if ckeys_line == first_data_line - 1 and len( self.ckeys) >= column_count: # it is valid. # if we have too many column keys, mention it if len(self.ckeys) > column_count: print "Note: more ckeys than columns (stripping extras)" # remove this line from the header try: self.pop_header(self.ckeys[0]) except: print "Couldn't pop column labels from header. Weird." else: # it is an invalid ckeys line. Generate our own! self.ckeys = [] for m in range(0, column_count): self.ckeys.append("column_" + str(m)) # for good measure, make sure to trim down the ckeys array to the size of the data columns for n in range(column_count, len(self.ckeys)): self.ckeys.pop(-1) # now we have a valid set of column ckeys one way or another, and we know first_data_line. if header_only: return # initialize the columns arrays # I did benchmarks and there's not much improvement by using numpy-arrays here. for label in self.ckeys: self.columns[label] = [] # start grabbing the data if self.debug: print time.time() - t0, "seconds: starting to read data" TimerStart() for n in range(first_data_line, len(self.lines)): # split the line up s = self.lines[n].strip().split(self.delimiter) # now start filling the column, ignoring the empty or bad data lines for m in range(len(s)): try: self.columns[self.ckeys[m]].append(float(s[m])) except: try: self.columns[self.ckeys[m]].append( complex(s[m][1:len(s[m]) - 1])) except: pass if self.debug: print time.time() - t0, "seconds: yeah." # now loop over the columns and make them all hard-core numpy columns! TimerStart() for k in self.ckeys: self.columns[k] = array(self.columns[k]) if self.debug: print time.time() - t0, "seconds: totally." # now, as an added bonus, rename some of the obnoxious headers for k in self.obnoxious_ckeys: if self.columns.has_key(k): if self.debug: print "renaming column", k, self.obnoxious_ckeys[k] self.columns[self.obnoxious_ckeys[k]] = self.columns[k]