Пример #1
0
    def collect( self ):
        '''collect all data.

        Data is stored in a multi-level dictionary (DataTree)
        '''

        self.data = odict()

        self.debug( "%s: collecting data paths." % (self.tracker))        
        is_function, datapaths = self.getDataPaths(self.tracker)
        self.debug( "%s: collected data paths." % (self.tracker))        

        # if function, no datapaths
        if is_function:
            d = self.getData( () )

            # save in data tree as leaf
            DataTree.setLeaf( self.data, ("all",), d )

            self.debug( "%s: collecting data finished for function." % (self.tracker))
            return

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn( "%s: no tracks found - no output" % self.tracker )
            return

        self.debug( "%s: filtering data paths." % (self.tracker))        
        # filter data paths
        datapaths = self.filterDataPaths( datapaths )
        self.debug( "%s: filtered data paths." % (self.tracker))        

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn( "%s: no tracks remain after filtering - no output" % self.tracker )
            return

        self.debug( "%s: building all_paths" % (self.tracker ) )
        if len(datapaths) > MAX_PATH_NESTING:
            self.warn( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths)))
            raise ValueError( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths)))

        all_paths = list(itertools.product( *datapaths ))
        self.debug( "%s: collecting data started for %i data paths" % (self.tracker, 
                                                                       len( all_paths) ) )

        self.data = odict()
        for path in all_paths:

            d = self.getData( path )

            # ignore empty data sets
            if d is None: continue

            # save in data tree as leaf
            DataTree.setLeaf( self.data, path, d )

        self.debug( "%s: collecting data finished for %i data paths" % (self.tracker, 
                                                                       len( all_paths) ) )
        return self.data
Пример #2
0
    def __call__(self, data, path ):
        '''iterate over leaves/branches in data structure.

        This method will call the :meth:`render` method for 
        each leaf/branch at level :attr:`nlevels`.
        '''
        if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self))

        result = ResultBlocks( title = path2str(path) )

        labels = DataTree.getPaths( data )
        if len(labels) < self.nlevels:
            self.warn( "at %s: expected at least %i levels - got %i: %s" %\
                           (str(path), self.nlevels, len(labels), str(labels)) )
            result.append( EmptyResultBlock( title = path2str(path) ) )
            return result

        paths = list(itertools.product( *labels[:-self.nlevels] ))

        for p in paths:
            work = DataTree.getLeaf( data, p )
            if not work: continue
            try:
                result.extend( self.render( work, path + p ) )
            except:
                self.warn("exeception raised in rendering for path: %s" % str(path+p))
                raise 
            
        return result
Пример #3
0
    def transform(self, data, path):

        debug( "%s: called" % str(self))

        vals =  data.keys()
        new_data = odict()

        for x1 in range(len(vals)-1):
            n1 = vals[x1]
            # find the first field that fits
            for field in self.fields:
                if field in data[n1]:
                    d1 = data[n1][field]
                    break
            else:
                raise KeyError("could not find any match from '%s' in '%s'" % (str(data[n1].keys()), str(self.fields )))

            for x2 in range(x1+1, len(vals)):
                n2 = vals[x2]
                try:
                    d2 = data[n2][field]
                except KeyErrror:
                    raise KeyError("no field %s in '%s'" % sttr(data[n2]))

                ## check if array?
                if len(d1) != len(d2):
                    raise ValueError("length of elements not equal: %i != %i" % (len(d1), len(d2)))
                
                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n1),
                                  d1 )

                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n2),
                                  d2 )
                                  
        return new_data
Пример #4
0
    def GET(self, tracker):

        cache = Cache.Cache( tracker, mode = "r" )
        data = DataTree.fromCache( cache )
        table, row_headers, col_headers = DataTree.tree2table( data )

        return render.data_table(table, row_headers, col_headers )
Пример #5
0
    def exclude( self ):
        '''exclude data paths.

        Only those data paths not matching the exclude term are accepted.
        '''
        if not self.exclude_paths: return
        
        data_paths = DataTree.getPaths( self.data )

        # currently enumerates - bfs more efficient
        all_paths = list(itertools.product( *data_paths ))

        for path in all_paths:
            for s in self.exclude_paths:
                if s in path:
                    self.debug( "%s: ignoring path %s because of :exclude:=%s" % (self.tracker, path, s))
                    try: DataTree.removeLeaf( self.data, path )
                    except KeyError: pass
                elif s.startswith("r(") and s.endswith(")"):
                    # collect pattern matches:
                    # remove r()
                    s = s[2:-1] 
                    # remove flanking quotation marks
                    if s[0] in ('"', "'") and s[-1] in ('"', "'"): s = s[1:-1]
                    rx = re.compile( s )
                    if any( ( rx.search( p ) for p in path ) ):
                        self.debug( "%s: ignoring path %s because of :exclude:=%s" % (self.tracker, path, s))
                        try: DataTree.removeLeaf( self.data, path )
                        except KeyError: pass
Пример #6
0
    def GET(self, tracker):

        cache = Cache.Cache(tracker, mode="r")
        data = DataTree.fromCache(cache)
        table, row_headers, col_headers = DataTree.tree2table(data)

        return render.data_table(table, row_headers, col_headers)
Пример #7
0
    def render(self):
        """supply the :class:`Renderer.Renderer` with the data to render. 
        
        The data supplied will depend on the ``groupby`` option.

        return resultblocks
        """
        self.debug("%s: rendering data started for %i items" % (self, len(self.data)))

        results = ResultBlocks(title="main")

        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            renderer_nlevels = 0

        data_paths = DataTree.getPaths(self.data)
        nlevels = len(data_paths)

        group_level = self.group_level

        self.debug(
            "%s: rendering data started. levels=%i, required levels>=%i, group_level=%i, data_paths=%s"
            % (self, nlevels, renderer_nlevels, group_level, str(data_paths)[:100])
        )

        if nlevels < renderer_nlevels:
            # add some dummy levels if levels is not enough
            d = self.data
            for x in range(renderer_nlevels - nlevels):
                d = odict((("all", d),))
            results.append(self.renderer(d, path=("all",)))

        elif group_level < 0 or renderer_nlevels < 0:
            # no grouping
            results.append(self.renderer(self.data, path=()))
        else:
            # group at level group_level
            paths = list(itertools.product(*data_paths[: group_level + 1]))
            for path in paths:
                work = DataTree.getLeaf(self.data, path)
                if not work:
                    continue
                try:
                    results.append(self.renderer(work, path=path))
                except:
                    results.append(ResultBlocks(Utils.buildException("rendering")))

        if len(results) == 0:
            self.warn("tracker returned no data.")
            raise ValueError("tracker returned no data.")

        self.debug("%s: rendering data finished with %i blocks" % (self.tracker, len(results)))

        return results
Пример #8
0
    def __call__(self, *args, **kwargs):

        try:
            self.parseArguments(*args, **kwargs)
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("parsing")))

        self.debug("profile: started: tracker: %s" % (self.tracker))

        try:
            self.collect()
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("collection")))

        self.debug("profile: finished: tracker: %s" % (self.tracker))

        data_paths = DataTree.getPaths(self.data)
        self.debug("%s: after collection: %i data_paths: %s" % (self, len(data_paths), str(data_paths)))

        # transform data
        try:
            self.transform()
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("transformation")))

        data_paths = DataTree.getPaths(self.data)
        self.debug("%s: after transformation: %i data_paths: %s" % (self, len(data_paths), str(data_paths)))

        # remove superfluous levels
        try:
            self.prune()
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("pruning")))

        data_paths = DataTree.getPaths(self.data)
        self.debug("%s: after pruning: %i data_paths: %s" % (self, len(data_paths), str(data_paths)))

        # remove group plots
        try:
            self.group()
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("grouping")))

        data_paths = DataTree.getPaths(self.data)
        self.debug("%s: after grouping: %i data_paths: %s" % (self, len(data_paths), str(data_paths)))

        self.debug("profile: started: renderer: %s" % (self.renderer))

        try:
            result = self.render()
        except:
            return ResultBlocks(ResultBlocks(Utils.buildException("rendering")))

        self.debug("profile: finished: renderer: %s" % (self.renderer))

        return result
Пример #9
0
    def collect(self):
        """collect all data.

        Data is stored in a multi-level dictionary (DataTree)
        """

        self.data = odict()

        is_function, datapaths = self.getDataPaths(self.tracker)

        # if function, no datapaths
        if is_function:
            d = self.getData(())

            # save in data tree as leaf
            DataTree.setLeaf(self.data, ("all",), d)

            self.debug("%s: collecting data finished for function." % (self.tracker))
            return

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn("%s: no tracks found - no output" % self.tracker)
            raise ValueError("no tracks found from %s" % self.tracker)

        # filter data paths
        datapaths = self.filterDataPaths(datapaths)

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn("%s: no tracks remain after filtering - no output" % self.tracker)
            raise ValueError("no tracks found from %s" % self.tracker)

        all_paths = list(itertools.product(*datapaths))
        self.debug("%s: collecting data started for %i data paths" % (self.tracker, len(all_paths)))

        self.data = odict()
        for path in all_paths:

            d = self.getData(path)

            # ignore empty data sets
            if d == None:
                continue

            # save in data tree as leaf
            DataTree.setLeaf(self.data, path, d)

        self.debug("%s: collecting data finished for %i data paths" % (self.tracker, len(all_paths)))
Пример #10
0
    def group( self ):
        '''rearrange data tree for grouping
        and set group level.

        Through grouping the data tree is rearranged such
        that the level at which data will be grouped will 
        be the top (0th) level in the nested dictionary.
        '''

        data_paths = DataTree.getPaths( self.data )
        nlevels = len(data_paths)
        
        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            renderer_nlevels = 0
        
        if self.groupby == "none":
            self.group_level = nlevels - 1

        elif self.groupby == "track":
            # track is first level
            self.group_level = 1
            # add pseudo levels, if there are not enough levels
            # to group by track
            if nlevels == renderer_nlevels:
                d = odict()
                for x in data_paths[0]: d[x] = odict( ((x, self.data[x]),))
                self.data = d

        elif self.groupby == "slice":
            # rearrange tracks and slices in data tree
            if nlevels <= 2 :
                self.warn( "grouping by slice, but only %i levels in data tree - all are grouped" % nlevels)
                self.group_level = 0
            else:
                self.data = DataTree.swop( self.data, 0, 1)
                self.group_level = 1

        elif self.groupby == "all":
            # group everything together
            self.group_level = 0

        else:
            # neither group by slice or track ("ungrouped")
            self.group_level = 0

        return self.data
Пример #11
0
    def render( self, work, path ):
        
        # initiate output structure
        results = ResultBlocks( title = path2str(path) )

        labels = DataTree.getPaths( work )
        # iterate over all items at leaf
        for path, branch in DataTree.getNodes( work, len(labels) - 2 ):
            for key in Utils.TrackerKeywords:
                if key in branch:
                    # add a result block
                    results.append( ResultBlock( branch[key],
                                                 title = path2str(path) ) )
            
        return results
Пример #12
0
    def __call__(self, dataframe, path ):
        '''iterate over leaves/branches in data structure.

        This method will call the :meth:`render` method for 
        each leaf/branch at level :attr:`nlevels`.
        '''
        if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self))

        try:
            labels = dataframe.index.levels
            paths = dataframe.index.unique()
        except AttributeError:
            labels = ['dummy1'] 
            paths = ['dummy1']

        result = ResultBlocks()

        #print len(labels), self.nlevels
        #print 'dataframe=', dataframe

        if self.nlevels != -1 and len(labels) != self.nlevels:
            raise ValueError( "at path %s: expected %i levels - got %i: %s" %\
                                  (str(path), self.nlevels, 
                                   len(labels), str(labels)) )
            #result.append( EmptyResultBlock( title = path2str(path) ) )
            #return result

        if not self.split_at:
            # print without splitting
            result.extend( self.render( dataframe, path ) )
        else:
            # split dataframe at first index
            first_level_labels = dataframe.index.get_level_values(0).unique()
            if len(first_level_labels) < self.split_at:
                result.extend( self.render( dataframe, path ) )
            else:
                # select tracks to always add to split 
                # pick always tracks
                if self.split_always:
                    always = [ x for x, y in itertools.product( first_level_labels, self.split_always) \
                                   if re.search( y, x ) ]
                else:
                    always = []

                for z, x in enumerate(range( 0, len(first_level_labels), self.split_at)) :
                    select = list(DataTree.unique( always + list(first_level_labels[x:x+self.split_at]) ))

                    if len(dataframe.index.names) == 1:
                        # if only one level, use loc to obtain dataframe
                        # index is duplicated, so ignore second level
                        work = pandas.concat( [dataframe.loc[[s]] for s in select], keys = select )
                        work.reset_index( range( 1, len(work.index.names)), drop=True, inplace=True )
                    else:
                        work = pandas.concat( [dataframe.xs(s, axis=0) for s in select], keys = select )
                        
                    # reconcile index names
                    work.index.names = dataframe.index.names
                    result.extend( self.render( work, path + (z, ) ) )
                    
        return result
Пример #13
0
    def getData(self, path):
        """get data for track and slice. Save data in persistent cache for further use.

        For functions, path should be an empty tuple.
        """

        if path:
            key = DataTree.path2str(path)
        else:
            key = "all"

        result, fromcache = None, False
        if not self.nocache or self.tracker_options:
            try:
                result = self.cache[key]
                fromcache = True
            except KeyError:
                pass

        kwargs = {}
        if self.tracker_options:
            kwargs["options"] = self.tracker_options

        if result == None:
            try:
                result = self.tracker(*path, **kwargs)
            except Exception, msg:
                self.warn(
                    "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker), DataTree.path2str(path), msg)
                )
                if VERBOSE:
                    self.warn(traceback.format_exc())
                raise
Пример #14
0
    def prune( self ):
        '''prune data tree.

        Remove all empty leaves.

        Remove all levels from the data tree that are
        superfluous, i.e. levels that contain only a single label
        and all labels in the hierarchy below are the same.
       
        This method ignores some labels with reserved key-words
        such as ``text``, ``rst``, ``xls``

        Ignore both the first and last level for this analyis.
        '''

        # set method to outwards - only prune leaves if they
        # are superfluous. 
        pruned = DataTree.prune( self.data, 
                                 ignore = Utils.TrackerKeywords,
                                 method = 'bottom-up' )

        for level, label in pruned:
            self.debug( "pruned level %i from data tree: label='%s'" % (level, label) )

        # save for conversion
        self.pruned = pruned
Пример #15
0
    def transform(self,data,path):

        from rpy2.robjects import r as R

        paths, lengths, values = [],[],[]

        labels = DataTree.getPaths(data)
        paths = list(itertools.product( *labels[:-1]))

        for path in paths:
            
            work = DataTree.getLeaf(data, path)
            try:
                lengths.append(len(work[self.pval]))
                values.extend(work[self.pval])

            except TypeError:
                lengths.append(0)
                values.append(work[self.pval])
            
        padj = R["p.adjust"](values, method = self.method)

        padj = [x for x in padj]
    
        for path in paths:

            num = lengths.pop(0)
            
            if num > 0:
                new_values = padj[0:num]
                padj = padj[num:]
            else:
                new_values = padj[0]
                padj = padj[1:]

        
            if path:
                work = odict(DataTree.getLeaf(data,path))
                work["P-adjust"] = new_values
                DataTree.setLeaf(data,path,work)
            else:
                data["P-adjust"] = new_values


        return data
Пример #16
0
    def __call__(self, data ):

        if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self))

        labels = DataTree.getPaths( data )        
        debug( "transform: started with paths: %s" % labels)
        assert len(labels) >= self.nlevels, "expected at least %i levels - got %i" % (self.nlevels, len(labels))
        
        paths = list(itertools.product( *labels[:-self.nlevels] ))
        for path in paths:
            work = DataTree.getLeaf( data, path )
            if not work: continue
            new_data = self.transform( work, path )
            if new_data:
                if path:
                    DataTree.setLeaf( data, path, new_data )
                else:
                    # set new root
                    data = new_data
            else:
                warn( "no data at %s - removing branch" % str(path))
                DataTree.removeLeaf( data, path )

        debug( "transform: finished with paths: %s" % DataTree.getPaths( data ))

        return data
Пример #17
0
    def group(self):
        """rearrange data tree for grouping.

        and set group level.
        """

        data_paths = DataTree.getPaths(self.data)
        nlevels = len(data_paths)

        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            renderer_nlevels = 0

        if self.groupby == "none":
            self.group_level = renderer_nlevels

        elif self.groupby == "track":
            # track is first level
            self.group_level = 0
            # add pseudo levels, if there are not enough levels
            # to group by track
            if nlevels == renderer_nlevels:
                d = odict()
                for x in data_paths[0]:
                    d[x] = odict(((x, self.data[x]),))
                self.data = d

        elif self.groupby == "slice":
            # rearrange tracks and slices in data tree
            if nlevels <= 2:
                warn("grouping by slice, but only %i levels in data tree - all are grouped" % nlevels)
                self.group_level = -1
            else:
                self.data = DataTree.swop(self.data, 0, 1)
                self.group_level = 0

        elif self.groupby == "all":
            # group everthing together
            self.group_level = -1
        else:
            # neither group by slice or track ("ungrouped")
            self.group_level = -1
Пример #18
0
    def getData( self, path ):
        """get data for track and slice. Save data in persistent cache for further use.

        For functions, path should be an empty tuple.
        """

        if path:
            key = DataTree.path2str(path)
        else:
            key = "all"

        result, fromcache = None, False
        # trackers with options are not cached
        if not self.nocache and not self.tracker_options:
            try:
                result = self.cache[ key ]
                fromcache = True
            except KeyError:
                pass
            except RuntimeError as msg:
                raise RuntimeError( "error when accessing key %s from cache: %s - potential problem with unpickable object?" % (key, msg))

        kwargs = {}
        if self.tracker_options:
            kwargs['options'] = self.tracker_options
        
        if result is None:
            try:
                result = self.tracker( *path, **kwargs )
            except Exception as msg:
                self.warn( "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker),
                                                                              DataTree.path2str(path), 
                                                                              msg) )
                if VERBOSE: self.warn( traceback.format_exc() )
                raise
        
        # store in cache 
        if not self.nocache and not fromcache:
            # exception - do not store data frames
            # test with None fails for some reason
            self.cache[key] = result

        return result
Пример #19
0
    def prune(self):
        """prune data tree.

        Remove all empty leaves.

        Remove all levels from the data tree that are
        superfluous, i.e. levels that contain only a single label
        all labels in the hierarchy below are the same.
       
        Ignore both the first and last level for this analyis.
        """

        # remove all empty leaves
        DataTree.removeEmptyLeaves(self.data)

        # prune superfluous levels
        data_paths = DataTree.getPaths(self.data)
        nlevels = len(data_paths)

        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            renderer_nlevels = 0

        # do not prune for renderers that want all data
        if renderer_nlevels < 0:
            return

        levels_to_prune = []

        for level in range(1, nlevels - 1):

            # check for single label in level
            if len(data_paths[level]) == 1:
                label = data_paths[level][0]
                prefixes = DataTree.getPrefixes(self.data, level)
                keep = False
                for prefix in prefixes:
                    leaves = DataTree.getLeaf(self.data, prefix)
                    if len(leaves) > 1 or label not in leaves:
                        keep = True
                        break
                if not keep:
                    levels_to_prune.append((level, label))

        levels_to_prune.reverse()

        # only prune to the minimum of levels required by renderer at most
        # levels_to_prune = levels_to_prune[:nlevels - renderer_nlevels]
        for level, label in levels_to_prune:
            self.debug("pruning level %i from data tree: label='%s'" % (level, label))
            DataTree.removeLevel(self.data, level)
Пример #20
0
    def buildMatrix( self, 
                     work, 
                     missing_value = 0, 
                     apply_transformations = True,
                     take = None,
                     dtype = numpy.float ):
        """build a matrix from work, a two-level nested dictionary.

        If *take* is given, then the matrix will be built from
        level 3, taking *take* from the deepest level only.

        This method will also apply conversions if apply_transformations
        is set.
        """

        labels = DataTree.getPaths( work )
        levels = len(labels)
        if take:
            if levels != 3: raise ValueError( "expected three labels" )
            if take not in labels[-1]: raise ValueError( "no data on `%s`" % take )
            take_f = lambda row,column: work[row][column][take]
        else:
            if levels != 2: raise ValueError( "expected two levels" )
            take_f = lambda row,column: work[row][column]

        rows, columns = labels[:2]

        self.debug("creating matrix")
        matrix = numpy.array( [missing_value] * (len(rows) * len(columns) ), dtype )
        matrix.shape = (len(rows), len(columns) )
        self.debug("constructing matrix")
        for x,row in enumerate(rows):
            for y, column in enumerate(columns):
                # missing values from DataTree
                try:
                    v = take_f( row, column )
                except KeyError:
                    continue

                # empty values from DataTree
                try:
                    if len(v) == 0: continue
                except TypeError:
                    pass

                # convert
                try:
                    matrix[x,y] = v
                except ValueError:
                    raise ValueError( "malformatted data: expected scalar, got '%s'" % str(work[row][column]) )
                except TypeError:
                    raise TypeError( "malformatted data: expected scalar, got '%s'" % str(work[row][column]) )
        
        
        if self.mConverters and apply_transformations:
            for converter in self.mConverters: 
                self.debug("applying converter %s" % converter)
                matrix, rows, columns = converter(matrix, rows, columns)

        # convert rows/columns to str (might be None)
        rows = [ str(x) for x in rows ]
        columns = [ str(x) for x in columns ]

        return matrix, rows, columns
Пример #21
0
 def __call__ (self, data):
     
     if self.nlevels == 0:
         self.nlevels = len(DataTree.getPaths(data))
         
     return Transformer.__call__(self,data)
Пример #22
0
def main():

    parser = optparse.OptionParser( version = "%prog version: $Id$", usage = USAGE )

    parser.add_option( "-v", "--verbose", dest="loglevel", type="int",
                       help="loglevel. The higher, the more output [default=%default]" )

    parser.add_option( "-i", "--view", dest="view", action="store_true",
                       help="view keys in cache [default=%default]" )

    parser.add_option( "-t", "--tracker", dest="tracker", type="string",
                          help="tracker to use [default=%default]" )
    
    parser.add_option( "-a", "--tracks", dest="tracks", type="string",
                          help="tracks to include [default=%default]" )

    parser.add_option( "-s", "--slices", dest="slices", type="string",
                          help="slices to include [default=%default]" )

    parser.add_option( "-g", "--groupby", dest="groupby", type="choice",
                       choices=("track", "slice", "all"),
                       help="groupby by track or slice [default=%default]" )

    parser.add_option( "-f", "--format", dest="format", type="choice",
                       choices=("tsv", "csv"),
                       help="output format [default=%default]" )

    parser.set_defaults( 
        loglevel = 2,
        view = False,
        tracker = None,
        tracks = None,
        slices = None,
        groupby = "slice",
        format = "tsv",
        )

    (options, args) = parser.parse_args()

    if len(args) != 1 and options.tracker == None : 
        print USAGE
        raise ValueError("please supply a tracker.""")

    if options.tracker:
        tracker = options.tracker
    else:
        tracker = args[0]

    cache = Cache.Cache( tracker, mode = "r" )

    if options.view:
        keys = [ x.split("/") for x in cache.keys()]
        sys.stdout.write( "# available tracks\n" )
        sys.stdout.write( "track\n%s" % "\n".join( set([ x[0] for x in keys] ) ) )
        sys.stdout.write( "\n" )
        sys.stdout.write( "# available slices\n" )
        sys.stdout.write( "slice\n%s" % "\n".join( set([ x[1] for x in keys] ) ) )
        sys.stdout.write( "\n" )
        return

    data = DataTree.fromCache( cache, 
                               tracks = options.tracks, 
                               slices = options.slices,
                               groupby = options.groupby )
    
    table, row_headers, col_headers = DataTree.tree2table( data )

    if options.format in ("tsv", "csv"):
        if options.format == "tsv": sep = "\t"
        elif options.format == "csv": sep = ","
        sys.stdout.write( sep+ sep.join( col_headers) + "\n")
        for h, row in zip( row_headers, table ):
            sys.stdout.write( "%s%s%s\n" % (h, sep, sep.join( row)))
Пример #23
0
 def __call__( self, data ):
     
     result = DataTree.asDataFrame( data )
     return odict( ( ('all', result),) )
Пример #24
0
    def __call__(self, *args, **kwargs ):

        #self.debug( "%s: heap at start\n%s" % (self, str(HP.heap()) ))
        
        try: self.parseArguments( *args, **kwargs )
        except: 
            self.error( "%s: exception in parsing" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "parsing" ) ))

        # collect no data if tracker is the empty tracker
        # and go straight to rendering
        try:
            if self.tracker.getTracks() == ["empty"]:
                # is instance does not work because of module mapping
                # type(Tracker.Empty) == SphinxReport.Tracker.Empty
                # type(self.tracker) == Tracker.Empty 
                # if isinstance( self.tracker, Tracker.Empty):
                result =self.renderer() 
                return ResultBlocks( result )
        except AttributeError:
            # for function trackers
            pass

        self.debug( "profile: started: tracker: %s" % (self.tracker))
        
        # collecting data 
        try: self.collect()
        except: 
            self.error( "%s: exception in collection" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "collection" ) ))
        finally:
            self.debug( "profile: finished: tracker: %s" % (self.tracker))

        if len(self.data) == 0: 
            self.info( "%s: no data - processing complete" % self.tracker )
            return None

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after collection: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        # self.debug( "%s: heap after collection\n%s" % (self, str(HP.heap()) ))        

        # transform data
        try: self.transform()
        except: 
            self.error( "%s: exception in transformation" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "transformation" ) ))

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after transformation: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        # special Renderers - do not proceed
        # Special renderers
        if isinstance( self.renderer, Renderer.User):
            results = ResultBlocks( title="main" )
            results.append( self.renderer( self.data, ('') ) )
            return results
        elif isinstance( self.renderer, Renderer.Debug):
            results = ResultBlocks( title="main" )
            results.append( self.renderer( self.data, ('') ) )
            return results

        # self.debug( "%s: heap after transformation\n%s" % (self, str(HP.heap()) ))        
        # restrict
        try: self.restrict()
        except:
            self.error( "%s: exception in restrict" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "restrict" ) ))

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after restrict: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        # exclude
        try: self.exclude()
        except:
            self.error( "%s: exception in exclude" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "exclude" ) ))

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after exclude: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        # remove superfluous levels
        try: self.prune()
        except: 
           self.error( "%s: exception in pruning" % self )
           return ResultBlocks(ResultBlocks( Utils.buildException( "pruning" ) ))

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after pruning: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        # remove group plots
        try: self.group()
        except: 
            self.error( "%s: exception in grouping" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "grouping" ) ))

        data_paths = DataTree.getPaths( self.data )
        self.debug( "%s: after grouping: %i data_paths: %s" % (self,len(data_paths), str(data_paths)))

        self.debug( "profile: started: renderer: %s" % (self.renderer))
        
        try: result = self.render()
        except: 
            self.error( "%s: exception in rendering" % self )
            return ResultBlocks(ResultBlocks( Utils.buildException( "rendering" ) ))
        finally:
            self.debug( "profile: finished: renderer: %s" % (self.renderer))

        #self.debug( "%s: heap at end\n%s" % (self, str(HP.heap()) ))

        return result
Пример #25
0
    def render( self ):
        '''supply the :class:`Renderer.Renderer` with the data to render. 
        
        The data supplied will depend on the ``groupby`` option.

        returns a ResultBlocks data structure.
        '''
        self.debug( "%s: rendering data started for %i items" % (self,
                                                                 len(self.data)))

        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            # set to -1 to avoid any grouping
            # important for user renderers that are functions
            # and have no level attribute.
            renderer_nlevels = -1

        # initiate output structure
        results = ResultBlocks( title = "")

        # convert to data series
        # The data is melted, i.e,
        # BMW    price    10000
        # BMW    speed    100
        # Golf   price    5000
        # Golf   speed    50  
        dataframe = DataTree.asDataFrame( self.data )
        # dataframe.write_csv( "test.csv" )
        
        if dataframe is None:
            self.warn( "%s: no data after conversion" % self )
            raise ValueError( "no data for renderer" )            

        # special patch: set column names to pruned levels
        # if there are no column names
        if len(dataframe.columns) == len(self.pruned):
            if list(dataframe.columns) == list(range( len(dataframe.columns))):
                dataframe.columns = [x[1] for x in self.pruned]
        
        index = dataframe.index

        def getIndexLevels( index ):
            try:
                # hierarchical index
                nlevels = len(index.levels)
            except AttributeError:
                nlevels = 1
                index = [ (x,) for x in index]
            #raise ValueError('data frame without MultiIndex' )
            return nlevels

        nlevels = getIndexLevels( index )

        self.debug( "%s: rendering data started. levels=%i, required levels>=%i, group_level=%s" %\
                        (self, nlevels, 
                         renderer_nlevels,
                         str(self.group_level) ) )

        if renderer_nlevels < 0 and self.group_level <= 0:
            # no grouping for renderers that will accept
            # a dataframe with any level of indices and no explicit
            # grouping has been asked for.
            results.append( self.renderer( dataframe, path = () ) )
        else:
            # user specified group level by default
            group_level = self.group_level

            # set group level to maximum allowed by renderer
            if renderer_nlevels >= 0:
                group_level = max(nlevels - renderer_nlevels, group_level)
                
            # add additional level if necessary
            if nlevels < group_level:
                prefix = tuple(["level%i" % x for x in range( group_level - nlevels)])
                dataframe.index = pandas.MultiIndex.from_tuples( [ prefix + x for x in dataframe.index ] )

            # used to be: group_level + 1
            # hierarchical index
            # numpy.unique converts everything to a string
            # which is not consistent with selecting later
            paths = map( tuple, DataTree.unique( [ x[:group_level] for x in dataframe.index.unique() ] ))

            pathlength = len(paths[0]) - 1

            is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex )

            if is_hierarchical:
                # Note: can only sort hierarchical indices
                dataframe = dataframe.sortlevel()

                if dataframe.index.lexsort_depth < pathlength:
                    raise ValueError('could not sort data frame: sort depth=%i < pathlength=%i, dataframe=%s' \
                                         % (dataframe.index.lexsort_depth, 
                                            pathlength,
                                            dataframe))
            
            for path in paths:

                if path:
                    if len(path) == nlevels:
                        # extract with loc in order to obtain dataframe
                        work = dataframe.loc[[path]]
                    else:
                        # select data frame as cross-section
                        work = dataframe.xs(path, axis=0 )
                else:
                    # empty tuple - use full data set
                    work = dataframe
                    
                # remove columns and rows in work that are all Na
                work = work.dropna( axis=1, how='all').dropna( axis=0, how='all')
                
                if is_hierarchical and renderer_nlevels >= 0:
                    work_levels = getIndexLevels( work.index )
                    # reduce levels of indices required to that required
                    # for Renderer. This occurs if groupby=none.
                    if work_levels > renderer_nlevels:
                        sep = work_levels - (renderer_nlevels - 1)
                        tuples = [ ( DataTree.path2str( x[:sep] ), ) + x[sep:] \
                                       for x in work.index ]
                        work.index = pandas.MultiIndex.from_tuples( tuples )

                try:
                    results.append( self.renderer( work,
                                                   path = path ))
                except:
                    self.error( "%s: exception in rendering" % self )
                    results.append( ResultBlocks( Utils.buildException( "rendering" ) ) )

        if len(results) == 0:
            self.warn("renderer returned no data.")
            raise ValueError( "renderer returned no data." )

        self.debug( "%s: rendering data finished with %i blocks" % (self.tracker, len(results)))

        return results
Пример #26
0
    def asSpreadSheet( self, dataframe, row_headers, col_headers, title ):
        '''save the table as an xls file.

        Multiple files of the same Renderer/Tracker combination are distinguished 
        by the title.
        '''
        
        self.debug("%s: saving %i x %i table as spread-sheet'"% (id(self), 
                                                                 len(row_headers), 
                                                                 len(col_headers)))
        quick = len(dataframe) > 10000
        
        if quick:
            # quick writing, only append method works
            wb = openpyxl.Workbook( optimized_write = True)
            def addWorksheet( wb, dataframe, title ):
                ws = wb.create_sheet()

                ws.append( [""] + list(col_headers) )
                for x,row in enumerate( dataframe.iterrows() ):
                    ws.append( [path2str(row_headers[x])] + list(row) )

                # patch: maximum title length seems to be 31
                ws.title = title[:30]
                    
        else:
            # do it cell-by-cell, this might be slow
            wb = openpyxl.Workbook( optimized_write = False)
            def addWorksheet( wb, dataframe, title ):
                ws = wb.create_sheet()

                # regex to detect rst hypelinks
                regex_link = re.compile( '`(.*) <(.*)>`_')
                for column, column_name in enumerate( dataframe.columns ):
                    c = ws.cell( row=0, column=column)
                    c.value = column_name
                    dataseries = dataframe[column_name]
                    if dataseries.dtype == object:
                        for row, value in enumerate( dataseries ):
                            c = ws.cell( row=row+1, column=column)
                            value = str(value)
                            if value.startswith('`'):
                                c.value, c.hyperlink = regex_link.match( value ).groups()
                            else:
                                c.value = value 
                    else:
                        for row, value in enumerate( dataseries ):
                            c = ws.cell( row=row+1, column=column)
                            c.value = value 
                # patch: maximum title length seems to be 31
                ws.title = title[:30]

        is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex )

        split = is_hierarchical and len(dataframe.index.levels) > 1

        if split:
            # create separate worksheets for nested indices
            nlevels = len(dataframe.index.levels)
            paths = map( tuple, DataTree.unique( [ x[:nlevels-1] for x in dataframe.index.unique() ] ))

            ws = wb.worksheets[0]
            ws.title = 'Summary' 
            ws.append( [dataframe.index.labels[:nlevels-1]] + ["Worksheet", "Rows" ] )

            for row, path in enumerate(paths):
                # select data frame as cross-section
                work = dataframe.xs(path, axis=0 )
                title = path2str( path )[:30]
                ws.append( list(path) + [title, len(work)] )
                c = ws.cell( row = row+1, column = nlevels )
                c.hyperlink = "#%s" % title
                addWorksheet( wb, work, title = title )
                
        else:
            writeWorksheet( wb, dataframe, title = title )

        # write result block 
        lines = []
        lines.append("`%i x %i table <#$xls %s$#>`__" %\
                     (len(row_headers), len(col_headers),
                      title) )
        lines.append( "" )
        
        r = ResultBlock( "\n".join(lines), title = title)
        r.xls = wb

        self.debug("%s: saved %i x %i table as spread-sheet'"% (id(self), 
                                                                len(row_headers), 
                                                                len(col_headers)))
        return r