def feed_list(self,table): """ Feeds a table (list of rows) which is converted to CSV. No more than len(columns) items are written for each row. All rows are filled up with "" entries to have an equal number of items. None entries are converted to empty strings, all other objects are stringified. """ columns = self.columns if columns: rowlen = len(columns) else: # Calculate the max. number of columns in the table rowlen = max(map(len,table)) # Prepare an empty table t = [None] * len(table) _quote = self._quote # Fill in data for i,row in Tools.irange(table): row = _quote(row[:rowlen]) if len(row) < rowlen: row[len(row):] = ['""'] * (rowlen - len(row)) t[i] = self.separator.join(row) # Add final CRLF and add as CSV text t.append('') self.text = self.text + self.lineend.join(t)
class _modinit: import math l = Tools.frange(0, 1, MAXLOCALITY) for i, factor in Tools.irange(l): _weights[i] = int((math.exp(factor) - 1.0) * 8192) if _debug: print i, '. weight =', _weights[i]
def feed_list(self, table): """ Feeds a table (list of rows) which is converted to CSV. No more than len(columns) items are written for each row. All rows are filled up with "" entries to have an equal number of items. None entries are converted to empty strings, all other objects are stringified. """ columns = self.columns if columns: rowlen = len(columns) else: # Calculate the max. number of columns in the table rowlen = max(map(len, table)) # Prepare an empty table t = [None] * len(table) _quote = self._quote # Fill in data for i, row in Tools.irange(table): row = _quote(row[:rowlen]) if len(row) < rowlen: row[len(row):] = ['""'] * (rowlen - len(row)) t[i] = self.separator.join(row) # Add final CRLF and add as CSV text t.append('') self.text = self.text + self.lineend.join(t)
def feed_objects(self, objects, getattr=getattr): """ Feeds a sequence of objects which is converted to CSV. For each object the set column names are interpreted as object attributes and used as basis for the CSV data. None values are converted to empty strings, all other attributes are added stringified. """ columns = self.columns if not columns: raise Error, 'no output columns set' rowlen = len(columns) # Create an emtpy table rows = len(objects) rowindices = Tools.trange(rows) t = [None] * rows for i in rowindices: t[i] = [None] * rowlen # Fill the table icols = Tools.irange(columns) for i in rowindices: obj = objects[i] for j, name in icols: t[i][j] = str(getattr(obj, name)) # Quote and join lines t = [self.separator.join(self._quote(x)) for x in t] # Add final CRLF and store CSV text t.append('') self.text = self.text + self.lineend.join(t)
def cut(self, NOM=NOM, DENOM=DENOM): """ Force a cut of the cache's contents. This will make room for at least one new entry. """ if _debug: print ' Cutting down cache size...' cachesize = self.cachesize # Cut the cache down to the entries in recent get history newdata = {} known_key = newdata.has_key data = self.data for id in self.get_history[-self.locality:]: if known_key(id): continue try: newdata[id] = data[id] except KeyError: pass cachesize = len(newdata) if _debug: print ' Size after cut to recent history:', cachesize # Check if cachesize * NOM >= self.max_cachesize * DENOM: # Calculate weights d = {} weights = _weights d_get = d.get for i, id in Tools.irange(self.get_history[-self.locality:]): if not known_key(id): continue d[id] = d_get(id, 0) + weights[i] # Delete all entries left from median ranking = Tools.sortedby(d.items(), 1) if _debug: print ' Ranking:', ranking for id, weight in ranking[:len(d) / 2]: if _debug: print ' Deleting', id, 'with weight =', weight del newdata[id] # Check cachesize = len(newdata) if cachesize * NOM >= self.max_cachesize * DENOM: # Ok, so the smart way didn't work... if _debug: print ' Did not work, going the hard way...' newdata.clear() cachesize = 0 self.data = newdata self.cachesize = cachesize self.cuts = self.cuts + 1
def _unquote(self, line): """ Unquote a CSV style quoted line of text. Internal method. Do not use directly. """ for i, text in Tools.irange(line): if text[:1] == '"' and text[-1:] == '"': text = text[1:-1] line[i] = text.replace('""', '"') return line
def _quote(self, line, str=str): """ CSV style quote the given line of text. """ nline = ['""'] * len(line) for i, item in Tools.irange(line): if item is not None: text = str(item) else: text = '' nline[i] = '"%s"' % text.replace('"', '""') return nline
def rpcdecode(url, prefix='', decode=1, splitat=TextTools.splitat, charsplit=TextTools.charsplit, len=len, tuple=tuple, urldecode=urldecode): """ Decode a RPC encoded function/method call. Returns a tuple (name,args,kws) where args is a tuple of string arguments and kws is a dictionary containing the given keyword parameters or None. All parameters are returned as strings; it is up to the caller to decode them into e.g. integers, etc. If prefix is given and found it is removed from the name prior to returning it. decode can be set to false to prevent the url from being urldecoded prior to processing. The decode function also supports the syntax 'method' instead of 'method()' for calls without arguments. """ if decode: url = urldecode(url) # Decode the method: method[(arg0,arg1,...,kw0=val0,kw1=val1,...)] name, rawargs = splitat(url, '(') if rawargs: # Cut out the pure argument part, ignoring any character after # the final ')' rawargs, rest = splitat(rawargs, ')', -1) # Argument list: split at ',' args = charsplit(rawargs, ',') if '=' in rawargs: kws = {} for i, arg in Tools.reverse(Tools.irange(args)): if '=' in arg: k, v = splitat(arg, '=') kws[k] = v del args[i] else: kws = None args = tuple(args) else: args = () kws = None if prefix: if name[:len(prefix)] == prefix: name = name[len(prefix):] return name, args, kws else: return name, args, kws
def _unquote(self,line): """ Unquote a CSV style quoted line of text. Internal method. Do not use directly. """ for i,text in Tools.irange(line): if text[:1] == '"' and text[-1:] == '"': text = text[1:-1] line[i] = text.replace('""','"') return line
def dictionary(self): """ Return the current data as dictionary of lists of strings, with one entry for each column. .columns must have been set using .set_columns() or by processing a given CSV header. """ table = {} lines = self.lines keys = self.columns if keys is None: raise Error, 'no columns set' rows = len(lines) for k in keys: table[k] = [None] * rows for i, key in Tools.irange(keys): column = table[key] for j, row in Tools.irange(lines): if len(row) > i: column[j] = row[i] return table
def filter_header(self, header, lower=TextTools.lower): """ Filter the given header line. The base class converts the column names to all lowercase and removes any whitespace included in the header. This method is only called in case the header was read from the data provided to the object. """ l = [''] * len(header) for i, column in Tools.irange(header): l[i] = ''.join(lower(column).split()) return l
def dictionary(self): """ Return the current data as dictionary of lists of strings, with one entry for each column. .columns must have been set using .set_columns() or by processing a given CSV header. """ table = {} lines = self.lines keys = self.columns if keys is None: raise Error,'no columns set' rows = len(lines) for k in keys: table[k] = [None] * rows for i, key in Tools.irange(keys): column = table[key] for j, row in Tools.irange(lines): if len(row) > i: column[j] = row[i] return table
def _quote(self, line, str=str): """ CSV style quote the given line of text. """ nline = ['""'] * len(line) for i,item in Tools.irange(line): if item is not None: text = str(item) else: text = '' nline[i] = '"%s"' % text.replace('"','""') return nline
def rpcdecode(url,prefix='',decode=1, splitat=TextTools.splitat,charsplit=TextTools.charsplit, len=len,tuple=tuple,urldecode=urldecode): """ Decode a RPC encoded function/method call. Returns a tuple (name,args,kws) where args is a tuple of string arguments and kws is a dictionary containing the given keyword parameters or None. All parameters are returned as strings; it is up to the caller to decode them into e.g. integers, etc. If prefix is given and found it is removed from the name prior to returning it. decode can be set to false to prevent the url from being urldecoded prior to processing. The decode function also supports the syntax 'method' instead of 'method()' for calls without arguments. """ if decode: url = urldecode(url) # Decode the method: method[(arg0,arg1,...,kw0=val0,kw1=val1,...)] name,rawargs = splitat(url,'(') if rawargs: # Cut out the pure argument part, ignoring any character after # the final ')' rawargs,rest = splitat(rawargs,')',-1) # Argument list: split at ',' args = charsplit(rawargs,',') if '=' in rawargs: kws = {} for i,arg in Tools.reverse(Tools.irange(args)): if '=' in arg: k,v = splitat(arg,'=') kws[k] = v del args[i] else: kws = None args = tuple(args) else: args = () kws = None if prefix: if name[:len(prefix)] == prefix: name = name[len(prefix):] return name,args,kws else: return name,args,kws
def list(self): """ Return the current data as list of lists, each having self.width string entries. Missing entries are set to None. """ width = self.width lines = self.lines table = [None] * len(lines) for i, row in Tools.irange(lines): row = row[:] if len(row) < width: row[len(row):] = [None] * (width - len(row)) table[i] = row return table
def _quote(self, line, str=str): """ CSV style quote the given line of text. """ nline = ['""'] * len(line) for i,item in Tools.irange(line): if item is None: text = '' elif isinstance(item, unicode): text = item.encode(self.encoding) else: text = str(item) nline[i] = '"%s"' % text.replace('"','""') return nline
def list(self): """ Return the current data as list of lists, each having self.width string entries. Missing entries are set to None. """ width = self.width lines = self.lines table = [None] * len(lines) for i, row in Tools.irange(lines): row = row[:] if len(row) < width: row[len(row):] = [None]*(width-len(row)) table[i] = row return table
def filter_header(self, header, lower=TextTools.lower): """ Filter the given header line. The base class converts the column names to all lowercase and removes any whitespace included in the header. This method is only called in case the header was read from the data provided to the object. """ l = [''] * len(header) for i,column in Tools.irange(header): l[i] = ''.join(lower(column).split()) return l
def __str__(self): lines = self.list() desc = self.description() width = 0 output = [] write = output.append for col in desc: write('%-*s|' % (col[1], col[0])) write('\n') for col in desc: write('=' * col[1] + '+') write('\n') for line in lines: for i, item in Tools.irange(line): write('%-*s|' % (desc[i][1], item)) write('\n') return ''.join(output)
def objects(self, constructor): """ Builds a list of objects by calling the given constructor with keywords defined by mapping column names to values for each input line. .columns must have been set using .set_columns() or by processing a given CSV header. """ lines = self.lines keys = self.columns if keys is None: raise Error, 'no columns set' objs = [None] * len(lines) for i, line in Tools.irange(lines): kws = dict(Tools.tuples(keys, line)) objs[i] = apply(constructor, (), kws) return objs
def __str__(self): lines = self.list() desc = self.description() width = 0 output = [] write = output.append for col in desc: write('%-*s|' % (col[1],col[0])) write('\n') for col in desc: write('=' * col[1] + '+') write('\n') for line in lines: for i,item in Tools.irange(line): write('%-*s|' % (desc[i][1],item)) write('\n') return ''.join(output)
def objects(self,constructor): """ Builds a list of objects by calling the given constructor with keywords defined by mapping column names to values for each input line. .columns must have been set using .set_columns() or by processing a given CSV header. """ lines = self.lines keys = self.columns if keys is None: raise Error,'no columns set' objs = [None] * len(lines) for i,line in Tools.irange(lines): kws = dict(Tools.tuples(keys, line)) objs[i] = apply(constructor,(),kws) return objs
def feed_dict(self,table,rows=None): """ Feeds a table (dict of lists) which is converted to CSV. Only the keys set as column names are used to form the CSV data. All lists in the dictionary must have equal length or at least rows number of entries, if rows is given. None entries are converted to empty strings, all other objects are stringified. """ columns = self.columns if not columns: raise Error,'no output columns set' rowlen = len(columns) # Create an emtpy table if not rows: rows = 0 for column in columns: nrows = len(table[column]) if nrows > rows: rows = nrows rowindices = Tools.trange(rows) t = [None] * rows for i in rowindices: t[i] = [None] * rowlen # Fill the table for j,k in Tools.irange(columns): for i in rowindices: t[i][j] = table[k][i] # Quote and join lines t = [self.separator.join(self._quote(x)) for x in t] # Add final CRLF and store CSV text t.append('') self.text = self.text + self.lineend.join(t)
def description(self, header=1): """ Return a list of tuples (column name, max length) found in the data. If header is true (default), the column names themselves are included in the calculation. """ lines = self.lines columns = self.columns width = len(columns) if header: lengths = [] for column in columns: lengths.append(len(column)) else: lengths = [0] * width for row in self.lines: for i, o in Tools.irange(row[:width]): if len(o) > lengths[i]: lengths[i] = len(o) return map(None, columns, lengths)
def feed_objects(self,objects, getattr=getattr): """ Feeds a sequence of objects which is converted to CSV. For each object the set column names are interpreted as object attributes and used as basis for the CSV data. None values are converted to empty strings, all other attributes are added stringified. """ columns = self.columns if not columns: raise Error,'no output columns set' rowlen = len(columns) # Create an emtpy table rows = len(objects) rowindices = Tools.trange(rows) t = [None] * rows for i in rowindices: t[i] = [None] * rowlen # Fill the table icols = Tools.irange(columns) for i in rowindices: obj = objects[i] for j,name in icols: t[i][j] = str(getattr(obj, name)) # Quote and join lines t = [self.separator.join(self._quote(x)) for x in t] # Add final CRLF and store CSV text t.append('') self.text = self.text + self.lineend.join(t)
def description(self, header=1): """ Return a list of tuples (column name, max length) found in the data. If header is true (default), the column names themselves are included in the calculation. """ lines = self.lines columns = self.columns width = len(columns) if header: lengths = [] for column in columns: lengths.append(len(column)) else: lengths = [0] * width for row in self.lines: for i,o in Tools.irange(row[:width]): if len(o) > lengths[i]: lengths[i] = len(o) return map(None,columns,lengths)
def cut(self, NOM=NOM,DENOM=DENOM): """ Force a cut of the cache's contents. This will make room for at least one new entry. """ if _debug: print ' Cutting down cache size...' cachesize = self.cachesize # Cut the cache down to the entries in recent get history newdata = {} known_key = newdata.has_key data = self.data for id in self.get_history[-self.locality:]: if known_key(id): continue try: newdata[id] = data[id] except KeyError: pass cachesize = len(newdata) if _debug: print ' Size after cut to recent history:',cachesize # Check if cachesize * NOM >= self.max_cachesize * DENOM: # Calculate weights d = {} weights = _weights d_get = d.get for i,id in Tools.irange(self.get_history[-self.locality:]): if not known_key(id): continue d[id] = d_get(id,0) + weights[i] # Delete all entries left from median ranking = Tools.sortedby(d.items(),1) if _debug: print ' Ranking:',ranking for id,weight in ranking[:len(d)/2]: if _debug: print ' Deleting',id,'with weight =',weight del newdata[id] # Check cachesize = len(newdata) if cachesize * NOM >= self.max_cachesize * DENOM: # Ok, so the smart way didn't work... if _debug: print ' Did not work, going the hard way...' newdata.clear() cachesize = 0 self.data = newdata self.cachesize = cachesize self.cuts = self.cuts + 1