def collect( self ): '''collect all data. Data is stored in a multi-level dictionary (DataTree) ''' self.data = odict() self.debug( "%s: collecting data paths." % (self.tracker)) is_function, datapaths = self.getDataPaths(self.tracker) self.debug( "%s: collected data paths." % (self.tracker)) # if function, no datapaths if is_function: d = self.getData( () ) # save in data tree as leaf DataTree.setLeaf( self.data, ("all",), d ) self.debug( "%s: collecting data finished for function." % (self.tracker)) return # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn( "%s: no tracks found - no output" % self.tracker ) return self.debug( "%s: filtering data paths." % (self.tracker)) # filter data paths datapaths = self.filterDataPaths( datapaths ) self.debug( "%s: filtered data paths." % (self.tracker)) # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn( "%s: no tracks remain after filtering - no output" % self.tracker ) return self.debug( "%s: building all_paths" % (self.tracker ) ) if len(datapaths) > MAX_PATH_NESTING: self.warn( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths))) raise ValueError( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths))) all_paths = list(itertools.product( *datapaths )) self.debug( "%s: collecting data started for %i data paths" % (self.tracker, len( all_paths) ) ) self.data = odict() for path in all_paths: d = self.getData( path ) # ignore empty data sets if d is None: continue # save in data tree as leaf DataTree.setLeaf( self.data, path, d ) self.debug( "%s: collecting data finished for %i data paths" % (self.tracker, len( all_paths) ) ) return self.data
def __call__(self, data, path ): '''iterate over leaves/branches in data structure. This method will call the :meth:`render` method for each leaf/branch at level :attr:`nlevels`. ''' if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) result = ResultBlocks( title = path2str(path) ) labels = DataTree.getPaths( data ) if len(labels) < self.nlevels: self.warn( "at %s: expected at least %i levels - got %i: %s" %\ (str(path), self.nlevels, len(labels), str(labels)) ) result.append( EmptyResultBlock( title = path2str(path) ) ) return result paths = list(itertools.product( *labels[:-self.nlevels] )) for p in paths: work = DataTree.getLeaf( data, p ) if not work: continue try: result.extend( self.render( work, path + p ) ) except: self.warn("exeception raised in rendering for path: %s" % str(path+p)) raise return result
def transform(self, data, path): debug( "%s: called" % str(self)) vals = data.keys() new_data = odict() for x1 in range(len(vals)-1): n1 = vals[x1] # find the first field that fits for field in self.fields: if field in data[n1]: d1 = data[n1][field] break else: raise KeyError("could not find any match from '%s' in '%s'" % (str(data[n1].keys()), str(self.fields ))) for x2 in range(x1+1, len(vals)): n2 = vals[x2] try: d2 = data[n2][field] except KeyErrror: raise KeyError("no field %s in '%s'" % sttr(data[n2])) ## check if array? if len(d1) != len(d2): raise ValueError("length of elements not equal: %i != %i" % (len(d1), len(d2))) DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n1), d1 ) DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n2), d2 ) return new_data
def GET(self, tracker): cache = Cache.Cache( tracker, mode = "r" ) data = DataTree.fromCache( cache ) table, row_headers, col_headers = DataTree.tree2table( data ) return render.data_table(table, row_headers, col_headers )
def exclude( self ): '''exclude data paths. Only those data paths not matching the exclude term are accepted. ''' if not self.exclude_paths: return data_paths = DataTree.getPaths( self.data ) # currently enumerates - bfs more efficient all_paths = list(itertools.product( *data_paths )) for path in all_paths: for s in self.exclude_paths: if s in path: self.debug( "%s: ignoring path %s because of :exclude:=%s" % (self.tracker, path, s)) try: DataTree.removeLeaf( self.data, path ) except KeyError: pass elif s.startswith("r(") and s.endswith(")"): # collect pattern matches: # remove r() s = s[2:-1] # remove flanking quotation marks if s[0] in ('"', "'") and s[-1] in ('"', "'"): s = s[1:-1] rx = re.compile( s ) if any( ( rx.search( p ) for p in path ) ): self.debug( "%s: ignoring path %s because of :exclude:=%s" % (self.tracker, path, s)) try: DataTree.removeLeaf( self.data, path ) except KeyError: pass
def GET(self, tracker): cache = Cache.Cache(tracker, mode="r") data = DataTree.fromCache(cache) table, row_headers, col_headers = DataTree.tree2table(data) return render.data_table(table, row_headers, col_headers)
def render(self): """supply the :class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. return resultblocks """ self.debug("%s: rendering data started for %i items" % (self, len(self.data))) results = ResultBlocks(title="main") # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: renderer_nlevels = 0 data_paths = DataTree.getPaths(self.data) nlevels = len(data_paths) group_level = self.group_level self.debug( "%s: rendering data started. levels=%i, required levels>=%i, group_level=%i, data_paths=%s" % (self, nlevels, renderer_nlevels, group_level, str(data_paths)[:100]) ) if nlevels < renderer_nlevels: # add some dummy levels if levels is not enough d = self.data for x in range(renderer_nlevels - nlevels): d = odict((("all", d),)) results.append(self.renderer(d, path=("all",))) elif group_level < 0 or renderer_nlevels < 0: # no grouping results.append(self.renderer(self.data, path=())) else: # group at level group_level paths = list(itertools.product(*data_paths[: group_level + 1])) for path in paths: work = DataTree.getLeaf(self.data, path) if not work: continue try: results.append(self.renderer(work, path=path)) except: results.append(ResultBlocks(Utils.buildException("rendering"))) if len(results) == 0: self.warn("tracker returned no data.") raise ValueError("tracker returned no data.") self.debug("%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results
def __call__(self, *args, **kwargs): try: self.parseArguments(*args, **kwargs) except: return ResultBlocks(ResultBlocks(Utils.buildException("parsing"))) self.debug("profile: started: tracker: %s" % (self.tracker)) try: self.collect() except: return ResultBlocks(ResultBlocks(Utils.buildException("collection"))) self.debug("profile: finished: tracker: %s" % (self.tracker)) data_paths = DataTree.getPaths(self.data) self.debug("%s: after collection: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) # transform data try: self.transform() except: return ResultBlocks(ResultBlocks(Utils.buildException("transformation"))) data_paths = DataTree.getPaths(self.data) self.debug("%s: after transformation: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) # remove superfluous levels try: self.prune() except: return ResultBlocks(ResultBlocks(Utils.buildException("pruning"))) data_paths = DataTree.getPaths(self.data) self.debug("%s: after pruning: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) # remove group plots try: self.group() except: return ResultBlocks(ResultBlocks(Utils.buildException("grouping"))) data_paths = DataTree.getPaths(self.data) self.debug("%s: after grouping: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) self.debug("profile: started: renderer: %s" % (self.renderer)) try: result = self.render() except: return ResultBlocks(ResultBlocks(Utils.buildException("rendering"))) self.debug("profile: finished: renderer: %s" % (self.renderer)) return result
def collect(self): """collect all data. Data is stored in a multi-level dictionary (DataTree) """ self.data = odict() is_function, datapaths = self.getDataPaths(self.tracker) # if function, no datapaths if is_function: d = self.getData(()) # save in data tree as leaf DataTree.setLeaf(self.data, ("all",), d) self.debug("%s: collecting data finished for function." % (self.tracker)) return # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn("%s: no tracks found - no output" % self.tracker) raise ValueError("no tracks found from %s" % self.tracker) # filter data paths datapaths = self.filterDataPaths(datapaths) # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn("%s: no tracks remain after filtering - no output" % self.tracker) raise ValueError("no tracks found from %s" % self.tracker) all_paths = list(itertools.product(*datapaths)) self.debug("%s: collecting data started for %i data paths" % (self.tracker, len(all_paths))) self.data = odict() for path in all_paths: d = self.getData(path) # ignore empty data sets if d == None: continue # save in data tree as leaf DataTree.setLeaf(self.data, path, d) self.debug("%s: collecting data finished for %i data paths" % (self.tracker, len(all_paths)))
def group( self ): '''rearrange data tree for grouping and set group level. Through grouping the data tree is rearranged such that the level at which data will be grouped will be the top (0th) level in the nested dictionary. ''' data_paths = DataTree.getPaths( self.data ) nlevels = len(data_paths) # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: renderer_nlevels = 0 if self.groupby == "none": self.group_level = nlevels - 1 elif self.groupby == "track": # track is first level self.group_level = 1 # add pseudo levels, if there are not enough levels # to group by track if nlevels == renderer_nlevels: d = odict() for x in data_paths[0]: d[x] = odict( ((x, self.data[x]),)) self.data = d elif self.groupby == "slice": # rearrange tracks and slices in data tree if nlevels <= 2 : self.warn( "grouping by slice, but only %i levels in data tree - all are grouped" % nlevels) self.group_level = 0 else: self.data = DataTree.swop( self.data, 0, 1) self.group_level = 1 elif self.groupby == "all": # group everything together self.group_level = 0 else: # neither group by slice or track ("ungrouped") self.group_level = 0 return self.data
def render( self, work, path ): # initiate output structure results = ResultBlocks( title = path2str(path) ) labels = DataTree.getPaths( work ) # iterate over all items at leaf for path, branch in DataTree.getNodes( work, len(labels) - 2 ): for key in Utils.TrackerKeywords: if key in branch: # add a result block results.append( ResultBlock( branch[key], title = path2str(path) ) ) return results
def __call__(self, dataframe, path ): '''iterate over leaves/branches in data structure. This method will call the :meth:`render` method for each leaf/branch at level :attr:`nlevels`. ''' if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) try: labels = dataframe.index.levels paths = dataframe.index.unique() except AttributeError: labels = ['dummy1'] paths = ['dummy1'] result = ResultBlocks() #print len(labels), self.nlevels #print 'dataframe=', dataframe if self.nlevels != -1 and len(labels) != self.nlevels: raise ValueError( "at path %s: expected %i levels - got %i: %s" %\ (str(path), self.nlevels, len(labels), str(labels)) ) #result.append( EmptyResultBlock( title = path2str(path) ) ) #return result if not self.split_at: # print without splitting result.extend( self.render( dataframe, path ) ) else: # split dataframe at first index first_level_labels = dataframe.index.get_level_values(0).unique() if len(first_level_labels) < self.split_at: result.extend( self.render( dataframe, path ) ) else: # select tracks to always add to split # pick always tracks if self.split_always: always = [ x for x, y in itertools.product( first_level_labels, self.split_always) \ if re.search( y, x ) ] else: always = [] for z, x in enumerate(range( 0, len(first_level_labels), self.split_at)) : select = list(DataTree.unique( always + list(first_level_labels[x:x+self.split_at]) )) if len(dataframe.index.names) == 1: # if only one level, use loc to obtain dataframe # index is duplicated, so ignore second level work = pandas.concat( [dataframe.loc[[s]] for s in select], keys = select ) work.reset_index( range( 1, len(work.index.names)), drop=True, inplace=True ) else: work = pandas.concat( [dataframe.xs(s, axis=0) for s in select], keys = select ) # reconcile index names work.index.names = dataframe.index.names result.extend( self.render( work, path + (z, ) ) ) return result
def getData(self, path): """get data for track and slice. Save data in persistent cache for further use. For functions, path should be an empty tuple. """ if path: key = DataTree.path2str(path) else: key = "all" result, fromcache = None, False if not self.nocache or self.tracker_options: try: result = self.cache[key] fromcache = True except KeyError: pass kwargs = {} if self.tracker_options: kwargs["options"] = self.tracker_options if result == None: try: result = self.tracker(*path, **kwargs) except Exception, msg: self.warn( "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker), DataTree.path2str(path), msg) ) if VERBOSE: self.warn(traceback.format_exc()) raise
def prune( self ): '''prune data tree. Remove all empty leaves. Remove all levels from the data tree that are superfluous, i.e. levels that contain only a single label and all labels in the hierarchy below are the same. This method ignores some labels with reserved key-words such as ``text``, ``rst``, ``xls`` Ignore both the first and last level for this analyis. ''' # set method to outwards - only prune leaves if they # are superfluous. pruned = DataTree.prune( self.data, ignore = Utils.TrackerKeywords, method = 'bottom-up' ) for level, label in pruned: self.debug( "pruned level %i from data tree: label='%s'" % (level, label) ) # save for conversion self.pruned = pruned
def transform(self,data,path): from rpy2.robjects import r as R paths, lengths, values = [],[],[] labels = DataTree.getPaths(data) paths = list(itertools.product( *labels[:-1])) for path in paths: work = DataTree.getLeaf(data, path) try: lengths.append(len(work[self.pval])) values.extend(work[self.pval]) except TypeError: lengths.append(0) values.append(work[self.pval]) padj = R["p.adjust"](values, method = self.method) padj = [x for x in padj] for path in paths: num = lengths.pop(0) if num > 0: new_values = padj[0:num] padj = padj[num:] else: new_values = padj[0] padj = padj[1:] if path: work = odict(DataTree.getLeaf(data,path)) work["P-adjust"] = new_values DataTree.setLeaf(data,path,work) else: data["P-adjust"] = new_values return data
def __call__(self, data ): if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) labels = DataTree.getPaths( data ) debug( "transform: started with paths: %s" % labels) assert len(labels) >= self.nlevels, "expected at least %i levels - got %i" % (self.nlevels, len(labels)) paths = list(itertools.product( *labels[:-self.nlevels] )) for path in paths: work = DataTree.getLeaf( data, path ) if not work: continue new_data = self.transform( work, path ) if new_data: if path: DataTree.setLeaf( data, path, new_data ) else: # set new root data = new_data else: warn( "no data at %s - removing branch" % str(path)) DataTree.removeLeaf( data, path ) debug( "transform: finished with paths: %s" % DataTree.getPaths( data )) return data
def group(self): """rearrange data tree for grouping. and set group level. """ data_paths = DataTree.getPaths(self.data) nlevels = len(data_paths) # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: renderer_nlevels = 0 if self.groupby == "none": self.group_level = renderer_nlevels elif self.groupby == "track": # track is first level self.group_level = 0 # add pseudo levels, if there are not enough levels # to group by track if nlevels == renderer_nlevels: d = odict() for x in data_paths[0]: d[x] = odict(((x, self.data[x]),)) self.data = d elif self.groupby == "slice": # rearrange tracks and slices in data tree if nlevels <= 2: warn("grouping by slice, but only %i levels in data tree - all are grouped" % nlevels) self.group_level = -1 else: self.data = DataTree.swop(self.data, 0, 1) self.group_level = 0 elif self.groupby == "all": # group everthing together self.group_level = -1 else: # neither group by slice or track ("ungrouped") self.group_level = -1
def getData( self, path ): """get data for track and slice. Save data in persistent cache for further use. For functions, path should be an empty tuple. """ if path: key = DataTree.path2str(path) else: key = "all" result, fromcache = None, False # trackers with options are not cached if not self.nocache and not self.tracker_options: try: result = self.cache[ key ] fromcache = True except KeyError: pass except RuntimeError as msg: raise RuntimeError( "error when accessing key %s from cache: %s - potential problem with unpickable object?" % (key, msg)) kwargs = {} if self.tracker_options: kwargs['options'] = self.tracker_options if result is None: try: result = self.tracker( *path, **kwargs ) except Exception as msg: self.warn( "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker), DataTree.path2str(path), msg) ) if VERBOSE: self.warn( traceback.format_exc() ) raise # store in cache if not self.nocache and not fromcache: # exception - do not store data frames # test with None fails for some reason self.cache[key] = result return result
def prune(self): """prune data tree. Remove all empty leaves. Remove all levels from the data tree that are superfluous, i.e. levels that contain only a single label all labels in the hierarchy below are the same. Ignore both the first and last level for this analyis. """ # remove all empty leaves DataTree.removeEmptyLeaves(self.data) # prune superfluous levels data_paths = DataTree.getPaths(self.data) nlevels = len(data_paths) # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: renderer_nlevels = 0 # do not prune for renderers that want all data if renderer_nlevels < 0: return levels_to_prune = [] for level in range(1, nlevels - 1): # check for single label in level if len(data_paths[level]) == 1: label = data_paths[level][0] prefixes = DataTree.getPrefixes(self.data, level) keep = False for prefix in prefixes: leaves = DataTree.getLeaf(self.data, prefix) if len(leaves) > 1 or label not in leaves: keep = True break if not keep: levels_to_prune.append((level, label)) levels_to_prune.reverse() # only prune to the minimum of levels required by renderer at most # levels_to_prune = levels_to_prune[:nlevels - renderer_nlevels] for level, label in levels_to_prune: self.debug("pruning level %i from data tree: label='%s'" % (level, label)) DataTree.removeLevel(self.data, level)
def buildMatrix( self, work, missing_value = 0, apply_transformations = True, take = None, dtype = numpy.float ): """build a matrix from work, a two-level nested dictionary. If *take* is given, then the matrix will be built from level 3, taking *take* from the deepest level only. This method will also apply conversions if apply_transformations is set. """ labels = DataTree.getPaths( work ) levels = len(labels) if take: if levels != 3: raise ValueError( "expected three labels" ) if take not in labels[-1]: raise ValueError( "no data on `%s`" % take ) take_f = lambda row,column: work[row][column][take] else: if levels != 2: raise ValueError( "expected two levels" ) take_f = lambda row,column: work[row][column] rows, columns = labels[:2] self.debug("creating matrix") matrix = numpy.array( [missing_value] * (len(rows) * len(columns) ), dtype ) matrix.shape = (len(rows), len(columns) ) self.debug("constructing matrix") for x,row in enumerate(rows): for y, column in enumerate(columns): # missing values from DataTree try: v = take_f( row, column ) except KeyError: continue # empty values from DataTree try: if len(v) == 0: continue except TypeError: pass # convert try: matrix[x,y] = v except ValueError: raise ValueError( "malformatted data: expected scalar, got '%s'" % str(work[row][column]) ) except TypeError: raise TypeError( "malformatted data: expected scalar, got '%s'" % str(work[row][column]) ) if self.mConverters and apply_transformations: for converter in self.mConverters: self.debug("applying converter %s" % converter) matrix, rows, columns = converter(matrix, rows, columns) # convert rows/columns to str (might be None) rows = [ str(x) for x in rows ] columns = [ str(x) for x in columns ] return matrix, rows, columns
def __call__ (self, data): if self.nlevels == 0: self.nlevels = len(DataTree.getPaths(data)) return Transformer.__call__(self,data)
def main(): parser = optparse.OptionParser( version = "%prog version: $Id$", usage = USAGE ) parser.add_option( "-v", "--verbose", dest="loglevel", type="int", help="loglevel. The higher, the more output [default=%default]" ) parser.add_option( "-i", "--view", dest="view", action="store_true", help="view keys in cache [default=%default]" ) parser.add_option( "-t", "--tracker", dest="tracker", type="string", help="tracker to use [default=%default]" ) parser.add_option( "-a", "--tracks", dest="tracks", type="string", help="tracks to include [default=%default]" ) parser.add_option( "-s", "--slices", dest="slices", type="string", help="slices to include [default=%default]" ) parser.add_option( "-g", "--groupby", dest="groupby", type="choice", choices=("track", "slice", "all"), help="groupby by track or slice [default=%default]" ) parser.add_option( "-f", "--format", dest="format", type="choice", choices=("tsv", "csv"), help="output format [default=%default]" ) parser.set_defaults( loglevel = 2, view = False, tracker = None, tracks = None, slices = None, groupby = "slice", format = "tsv", ) (options, args) = parser.parse_args() if len(args) != 1 and options.tracker == None : print USAGE raise ValueError("please supply a tracker.""") if options.tracker: tracker = options.tracker else: tracker = args[0] cache = Cache.Cache( tracker, mode = "r" ) if options.view: keys = [ x.split("/") for x in cache.keys()] sys.stdout.write( "# available tracks\n" ) sys.stdout.write( "track\n%s" % "\n".join( set([ x[0] for x in keys] ) ) ) sys.stdout.write( "\n" ) sys.stdout.write( "# available slices\n" ) sys.stdout.write( "slice\n%s" % "\n".join( set([ x[1] for x in keys] ) ) ) sys.stdout.write( "\n" ) return data = DataTree.fromCache( cache, tracks = options.tracks, slices = options.slices, groupby = options.groupby ) table, row_headers, col_headers = DataTree.tree2table( data ) if options.format in ("tsv", "csv"): if options.format == "tsv": sep = "\t" elif options.format == "csv": sep = "," sys.stdout.write( sep+ sep.join( col_headers) + "\n") for h, row in zip( row_headers, table ): sys.stdout.write( "%s%s%s\n" % (h, sep, sep.join( row)))
def __call__( self, data ): result = DataTree.asDataFrame( data ) return odict( ( ('all', result),) )
def __call__(self, *args, **kwargs ): #self.debug( "%s: heap at start\n%s" % (self, str(HP.heap()) )) try: self.parseArguments( *args, **kwargs ) except: self.error( "%s: exception in parsing" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "parsing" ) )) # collect no data if tracker is the empty tracker # and go straight to rendering try: if self.tracker.getTracks() == ["empty"]: # is instance does not work because of module mapping # type(Tracker.Empty) == SphinxReport.Tracker.Empty # type(self.tracker) == Tracker.Empty # if isinstance( self.tracker, Tracker.Empty): result =self.renderer() return ResultBlocks( result ) except AttributeError: # for function trackers pass self.debug( "profile: started: tracker: %s" % (self.tracker)) # collecting data try: self.collect() except: self.error( "%s: exception in collection" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "collection" ) )) finally: self.debug( "profile: finished: tracker: %s" % (self.tracker)) if len(self.data) == 0: self.info( "%s: no data - processing complete" % self.tracker ) return None data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after collection: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) # self.debug( "%s: heap after collection\n%s" % (self, str(HP.heap()) )) # transform data try: self.transform() except: self.error( "%s: exception in transformation" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "transformation" ) )) data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after transformation: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) # special Renderers - do not proceed # Special renderers if isinstance( self.renderer, Renderer.User): results = ResultBlocks( title="main" ) results.append( self.renderer( self.data, ('') ) ) return results elif isinstance( self.renderer, Renderer.Debug): results = ResultBlocks( title="main" ) results.append( self.renderer( self.data, ('') ) ) return results # self.debug( "%s: heap after transformation\n%s" % (self, str(HP.heap()) )) # restrict try: self.restrict() except: self.error( "%s: exception in restrict" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "restrict" ) )) data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after restrict: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) # exclude try: self.exclude() except: self.error( "%s: exception in exclude" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "exclude" ) )) data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after exclude: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) # remove superfluous levels try: self.prune() except: self.error( "%s: exception in pruning" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "pruning" ) )) data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after pruning: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) # remove group plots try: self.group() except: self.error( "%s: exception in grouping" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "grouping" ) )) data_paths = DataTree.getPaths( self.data ) self.debug( "%s: after grouping: %i data_paths: %s" % (self,len(data_paths), str(data_paths))) self.debug( "profile: started: renderer: %s" % (self.renderer)) try: result = self.render() except: self.error( "%s: exception in rendering" % self ) return ResultBlocks(ResultBlocks( Utils.buildException( "rendering" ) )) finally: self.debug( "profile: finished: renderer: %s" % (self.renderer)) #self.debug( "%s: heap at end\n%s" % (self, str(HP.heap()) )) return result
def render( self ): '''supply the :class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. returns a ResultBlocks data structure. ''' self.debug( "%s: rendering data started for %i items" % (self, len(self.data))) # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: # set to -1 to avoid any grouping # important for user renderers that are functions # and have no level attribute. renderer_nlevels = -1 # initiate output structure results = ResultBlocks( title = "") # convert to data series # The data is melted, i.e, # BMW price 10000 # BMW speed 100 # Golf price 5000 # Golf speed 50 dataframe = DataTree.asDataFrame( self.data ) # dataframe.write_csv( "test.csv" ) if dataframe is None: self.warn( "%s: no data after conversion" % self ) raise ValueError( "no data for renderer" ) # special patch: set column names to pruned levels # if there are no column names if len(dataframe.columns) == len(self.pruned): if list(dataframe.columns) == list(range( len(dataframe.columns))): dataframe.columns = [x[1] for x in self.pruned] index = dataframe.index def getIndexLevels( index ): try: # hierarchical index nlevels = len(index.levels) except AttributeError: nlevels = 1 index = [ (x,) for x in index] #raise ValueError('data frame without MultiIndex' ) return nlevels nlevels = getIndexLevels( index ) self.debug( "%s: rendering data started. levels=%i, required levels>=%i, group_level=%s" %\ (self, nlevels, renderer_nlevels, str(self.group_level) ) ) if renderer_nlevels < 0 and self.group_level <= 0: # no grouping for renderers that will accept # a dataframe with any level of indices and no explicit # grouping has been asked for. results.append( self.renderer( dataframe, path = () ) ) else: # user specified group level by default group_level = self.group_level # set group level to maximum allowed by renderer if renderer_nlevels >= 0: group_level = max(nlevels - renderer_nlevels, group_level) # add additional level if necessary if nlevels < group_level: prefix = tuple(["level%i" % x for x in range( group_level - nlevels)]) dataframe.index = pandas.MultiIndex.from_tuples( [ prefix + x for x in dataframe.index ] ) # used to be: group_level + 1 # hierarchical index # numpy.unique converts everything to a string # which is not consistent with selecting later paths = map( tuple, DataTree.unique( [ x[:group_level] for x in dataframe.index.unique() ] )) pathlength = len(paths[0]) - 1 is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex ) if is_hierarchical: # Note: can only sort hierarchical indices dataframe = dataframe.sortlevel() if dataframe.index.lexsort_depth < pathlength: raise ValueError('could not sort data frame: sort depth=%i < pathlength=%i, dataframe=%s' \ % (dataframe.index.lexsort_depth, pathlength, dataframe)) for path in paths: if path: if len(path) == nlevels: # extract with loc in order to obtain dataframe work = dataframe.loc[[path]] else: # select data frame as cross-section work = dataframe.xs(path, axis=0 ) else: # empty tuple - use full data set work = dataframe # remove columns and rows in work that are all Na work = work.dropna( axis=1, how='all').dropna( axis=0, how='all') if is_hierarchical and renderer_nlevels >= 0: work_levels = getIndexLevels( work.index ) # reduce levels of indices required to that required # for Renderer. This occurs if groupby=none. if work_levels > renderer_nlevels: sep = work_levels - (renderer_nlevels - 1) tuples = [ ( DataTree.path2str( x[:sep] ), ) + x[sep:] \ for x in work.index ] work.index = pandas.MultiIndex.from_tuples( tuples ) try: results.append( self.renderer( work, path = path )) except: self.error( "%s: exception in rendering" % self ) results.append( ResultBlocks( Utils.buildException( "rendering" ) ) ) if len(results) == 0: self.warn("renderer returned no data.") raise ValueError( "renderer returned no data." ) self.debug( "%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results
def asSpreadSheet( self, dataframe, row_headers, col_headers, title ): '''save the table as an xls file. Multiple files of the same Renderer/Tracker combination are distinguished by the title. ''' self.debug("%s: saving %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) quick = len(dataframe) > 10000 if quick: # quick writing, only append method works wb = openpyxl.Workbook( optimized_write = True) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() ws.append( [""] + list(col_headers) ) for x,row in enumerate( dataframe.iterrows() ): ws.append( [path2str(row_headers[x])] + list(row) ) # patch: maximum title length seems to be 31 ws.title = title[:30] else: # do it cell-by-cell, this might be slow wb = openpyxl.Workbook( optimized_write = False) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() # regex to detect rst hypelinks regex_link = re.compile( '`(.*) <(.*)>`_') for column, column_name in enumerate( dataframe.columns ): c = ws.cell( row=0, column=column) c.value = column_name dataseries = dataframe[column_name] if dataseries.dtype == object: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) value = str(value) if value.startswith('`'): c.value, c.hyperlink = regex_link.match( value ).groups() else: c.value = value else: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) c.value = value # patch: maximum title length seems to be 31 ws.title = title[:30] is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex ) split = is_hierarchical and len(dataframe.index.levels) > 1 if split: # create separate worksheets for nested indices nlevels = len(dataframe.index.levels) paths = map( tuple, DataTree.unique( [ x[:nlevels-1] for x in dataframe.index.unique() ] )) ws = wb.worksheets[0] ws.title = 'Summary' ws.append( [dataframe.index.labels[:nlevels-1]] + ["Worksheet", "Rows" ] ) for row, path in enumerate(paths): # select data frame as cross-section work = dataframe.xs(path, axis=0 ) title = path2str( path )[:30] ws.append( list(path) + [title, len(work)] ) c = ws.cell( row = row+1, column = nlevels ) c.hyperlink = "#%s" % title addWorksheet( wb, work, title = title ) else: writeWorksheet( wb, dataframe, title = title ) # write result block lines = [] lines.append("`%i x %i table <#$xls %s$#>`__" %\ (len(row_headers), len(col_headers), title) ) lines.append( "" ) r = ResultBlock( "\n".join(lines), title = title) r.xls = wb self.debug("%s: saved %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) return r