def __call__(self, dataframe, path ): '''iterate over leaves/branches in data structure. This method will call the :meth:`render` method for each leaf/branch at level :attr:`nlevels`. ''' if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) try: labels = dataframe.index.levels paths = dataframe.index.unique() except AttributeError: labels = ['dummy1'] paths = ['dummy1'] result = ResultBlocks() #print len(labels), self.nlevels #print 'dataframe=', dataframe if self.nlevels != -1 and len(labels) != self.nlevels: raise ValueError( "at path %s: expected %i levels - got %i: %s" %\ (str(path), self.nlevels, len(labels), str(labels)) ) #result.append( EmptyResultBlock( title = path2str(path) ) ) #return result if not self.split_at: # print without splitting result.extend( self.render( dataframe, path ) ) else: # split dataframe at first index first_level_labels = dataframe.index.get_level_values(0).unique() if len(first_level_labels) < self.split_at: result.extend( self.render( dataframe, path ) ) else: # select tracks to always add to split # pick always tracks if self.split_always: always = [ x for x, y in itertools.product( first_level_labels, self.split_always) \ if re.search( y, x ) ] else: always = [] for z, x in enumerate(range( 0, len(first_level_labels), self.split_at)) : select = list(DataTree.unique( always + list(first_level_labels[x:x+self.split_at]) )) if len(dataframe.index.names) == 1: # if only one level, use loc to obtain dataframe # index is duplicated, so ignore second level work = pandas.concat( [dataframe.loc[[s]] for s in select], keys = select ) work.reset_index( range( 1, len(work.index.names)), drop=True, inplace=True ) else: work = pandas.concat( [dataframe.xs(s, axis=0) for s in select], keys = select ) # reconcile index names work.index.names = dataframe.index.names result.extend( self.render( work, path + (z, ) ) ) return result
def render( self ): '''supply the :class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. returns a ResultBlocks data structure. ''' self.debug( "%s: rendering data started for %i items" % (self, len(self.data))) # get number of levels required by renderer try: renderer_nlevels = self.renderer.nlevels except AttributeError: # set to -1 to avoid any grouping # important for user renderers that are functions # and have no level attribute. renderer_nlevels = -1 # initiate output structure results = ResultBlocks( title = "") # convert to data series # The data is melted, i.e, # BMW price 10000 # BMW speed 100 # Golf price 5000 # Golf speed 50 dataframe = DataTree.asDataFrame( self.data ) # dataframe.write_csv( "test.csv" ) if dataframe is None: self.warn( "%s: no data after conversion" % self ) raise ValueError( "no data for renderer" ) # special patch: set column names to pruned levels # if there are no column names if len(dataframe.columns) == len(self.pruned): if list(dataframe.columns) == list(range( len(dataframe.columns))): dataframe.columns = [x[1] for x in self.pruned] index = dataframe.index def getIndexLevels( index ): try: # hierarchical index nlevels = len(index.levels) except AttributeError: nlevels = 1 index = [ (x,) for x in index] #raise ValueError('data frame without MultiIndex' ) return nlevels nlevels = getIndexLevels( index ) self.debug( "%s: rendering data started. levels=%i, required levels>=%i, group_level=%s" %\ (self, nlevels, renderer_nlevels, str(self.group_level) ) ) if renderer_nlevels < 0 and self.group_level <= 0: # no grouping for renderers that will accept # a dataframe with any level of indices and no explicit # grouping has been asked for. results.append( self.renderer( dataframe, path = () ) ) else: # user specified group level by default group_level = self.group_level # set group level to maximum allowed by renderer if renderer_nlevels >= 0: group_level = max(nlevels - renderer_nlevels, group_level) # add additional level if necessary if nlevels < group_level: prefix = tuple(["level%i" % x for x in range( group_level - nlevels)]) dataframe.index = pandas.MultiIndex.from_tuples( [ prefix + x for x in dataframe.index ] ) # used to be: group_level + 1 # hierarchical index # numpy.unique converts everything to a string # which is not consistent with selecting later paths = map( tuple, DataTree.unique( [ x[:group_level] for x in dataframe.index.unique() ] )) pathlength = len(paths[0]) - 1 is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex ) if is_hierarchical: # Note: can only sort hierarchical indices dataframe = dataframe.sortlevel() if dataframe.index.lexsort_depth < pathlength: raise ValueError('could not sort data frame: sort depth=%i < pathlength=%i, dataframe=%s' \ % (dataframe.index.lexsort_depth, pathlength, dataframe)) for path in paths: if path: if len(path) == nlevels: # extract with loc in order to obtain dataframe work = dataframe.loc[[path]] else: # select data frame as cross-section work = dataframe.xs(path, axis=0 ) else: # empty tuple - use full data set work = dataframe # remove columns and rows in work that are all Na work = work.dropna( axis=1, how='all').dropna( axis=0, how='all') if is_hierarchical and renderer_nlevels >= 0: work_levels = getIndexLevels( work.index ) # reduce levels of indices required to that required # for Renderer. This occurs if groupby=none. if work_levels > renderer_nlevels: sep = work_levels - (renderer_nlevels - 1) tuples = [ ( DataTree.path2str( x[:sep] ), ) + x[sep:] \ for x in work.index ] work.index = pandas.MultiIndex.from_tuples( tuples ) try: results.append( self.renderer( work, path = path )) except: self.error( "%s: exception in rendering" % self ) results.append( ResultBlocks( Utils.buildException( "rendering" ) ) ) if len(results) == 0: self.warn("renderer returned no data.") raise ValueError( "renderer returned no data." ) self.debug( "%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results
def asSpreadSheet( self, dataframe, row_headers, col_headers, title ): '''save the table as an xls file. Multiple files of the same Renderer/Tracker combination are distinguished by the title. ''' self.debug("%s: saving %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) quick = len(dataframe) > 10000 if quick: # quick writing, only append method works wb = openpyxl.Workbook( optimized_write = True) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() ws.append( [""] + list(col_headers) ) for x,row in enumerate( dataframe.iterrows() ): ws.append( [path2str(row_headers[x])] + list(row) ) # patch: maximum title length seems to be 31 ws.title = title[:30] else: # do it cell-by-cell, this might be slow wb = openpyxl.Workbook( optimized_write = False) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() # regex to detect rst hypelinks regex_link = re.compile( '`(.*) <(.*)>`_') for column, column_name in enumerate( dataframe.columns ): c = ws.cell( row=0, column=column) c.value = column_name dataseries = dataframe[column_name] if dataseries.dtype == object: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) value = str(value) if value.startswith('`'): c.value, c.hyperlink = regex_link.match( value ).groups() else: c.value = value else: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) c.value = value # patch: maximum title length seems to be 31 ws.title = title[:30] is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex ) split = is_hierarchical and len(dataframe.index.levels) > 1 if split: # create separate worksheets for nested indices nlevels = len(dataframe.index.levels) paths = map( tuple, DataTree.unique( [ x[:nlevels-1] for x in dataframe.index.unique() ] )) ws = wb.worksheets[0] ws.title = 'Summary' ws.append( [dataframe.index.labels[:nlevels-1]] + ["Worksheet", "Rows" ] ) for row, path in enumerate(paths): # select data frame as cross-section work = dataframe.xs(path, axis=0 ) title = path2str( path )[:30] ws.append( list(path) + [title, len(work)] ) c = ws.cell( row = row+1, column = nlevels ) c.hyperlink = "#%s" % title addWorksheet( wb, work, title = title ) else: writeWorksheet( wb, dataframe, title = title ) # write result block lines = [] lines.append("`%i x %i table <#$xls %s$#>`__" %\ (len(row_headers), len(col_headers), title) ) lines.append( "" ) r = ResultBlock( "\n".join(lines), title = title) r.xls = wb self.debug("%s: saved %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) return r