Пример #1
0
    def render(self, dataframe, path):

        if len(dataframe.columns) < 2:
            raise ValueError(
                "requiring two coordinates, only got %s" %
                str(dataframe.columns))

        plts, legend = [], []
        blocks = ResultBlocks()

        for xcolumn, ycolumn in itertools.combinations(dataframe.columns, 2):

            # remove missing data points
            xvalues, yvalues = Stats.filterMissing(
                (dataframe[xcolumn], dataframe[ycolumn]))

            # remove columns with all NaN
            if len(xvalues) == 0 or len(yvalues) == 0:
                continue

            # apply log transformation on data not on plot
            if self.logscale:
                if "x" in self.logscale:
                    xvalues = R.log10(xvalues)
                if "y" in self.logscale:
                    yvalues = R.log10(yvalues)

            self.startPlot()
            # wrap, as pandas series can not
            # passed through rpy2.
            R.smoothScatter(numpy.array(xvalues, dtype=numpy.float),
                            numpy.array(yvalues, dtype=numpy.float),
                            xlab=xcolumn,
                            ylab=ycolumn,
                            nbin=self.nbins)
            blocks.extend(self.endPlot(dataframe, path))

        return blocks
Пример #2
0
def layoutBlocks(blocks, layout="column"):
    """layout blocks of rst text.

    layout can be one of "column", "row", or "grid".

    The layout uses an rst table to arrange elements.
    """

    lines = []
    if len(blocks) == 0:
        return lines

    # flatten blocks
    bb = ResultBlocks()
    for b in blocks:
        if b.title:
            b.updateTitle(b.title, "prefix")
        try:
            bb.extend(b)
        except TypeError:
            bb.append(b)

    blocks = bb

    # check if postambles are identical across all blocks
    postambles = set([b.postamble for b in blocks])

    if len(postambles) == 1:
        blocks.clearPostamble()
        postamble = postambles.pop()
    else:
        postamble = None

    if layout == "column":
        for block in blocks:
            if block.title:
                lines.extend(block.title.split("\n"))
                lines.append("")
            else:
                warn("report_directive.layoutBlocks: missing title")

            lines.extend(block.text.split("\n"))
            lines.extend(block.postamble.split("\n"))

        lines.append("")

        if postamble:
            lines.extend(postamble.split("\n"))
            lines.append("")
        return lines

    elif layout in ("row", "grid"):
        if layout == "row":
            ncols = len(blocks)
        elif layout == "grid":
            ncols = int(math.ceil(math.sqrt(len(blocks))))

    elif layout.startswith("column"):
        ncols = min(len(blocks), int(layout.split("-")[1]))
        # TODO: think about appropriate fix for empty data
        if ncols == 0:
            ncols = 1
            return lines
    else:
        raise ValueError("unknown layout %s " % layout)

    if ncols == 0:
        warn("no columns")
        return lines

    # compute column widths
    widths = [x.getWidth() for x in blocks]
    text_heights = [x.getTextHeight() for x in blocks]
    title_heights = [x.getTitleHeight() for x in blocks]

    columnwidths = []
    for x in range(ncols):
        columnwidths.append(max([widths[y] for y in
                                 range(x, len(blocks), ncols)]))

    separator = "+%s+" % "+".join(["-" * x for x in columnwidths])

    # add empty blocks
    if len(blocks) % ncols:
        blocks.extend([ResultBlock("", "")] * (ncols - len(blocks) % ncols))

    for nblock in range(0, len(blocks), ncols):

        # add text
        lines.append(separator)
        max_height = max(text_heights[nblock:nblock + ncols])
        new_blocks = ResultBlocks()

        for xx in range(nblock, min(nblock + ncols, len(blocks))):
            txt, col = blocks[xx].text.split("\n"), xx % ncols
            txt = blocks[xx].text.split("\n") + \
                  blocks[xx].postamble.split("\n")
            col = xx % ncols

            max_width = columnwidths[col]

            # add missig lines
            txt.extend([""] * (max_height - len(txt)))
            # extend lines
            txt = [x + " " * (max_width - len(x)) for x in txt]

            new_blocks.append(txt)

        for l in zip(*new_blocks):
            lines.append("|%s|" % "|".join(l))

        # add subtitles
        max_height = max(title_heights[nblock:nblock + ncols])

        if max_height > 0:

            new_blocks = ResultBlocks()
            lines.append(separator)

            for xx in range(nblock, min(nblock + ncols, len(blocks))):

                txt, col = blocks[xx].title.split("\n"), xx % ncols

                max_width = columnwidths[col]
                # add missig lines
                txt.extend([""] * (max_height - len(txt)))
                # extend lines
                txt = [x + " " * (max_width - len(x)) for x in txt]

                new_blocks.append(txt)

            for l in zip(*new_blocks):
                lines.append("|%s|" % "|".join(l))

    lines.append(separator)

    if postamble:
        lines.append(postamble)

    lines.append("")

    return lines
Пример #3
0
    def __call__(self, dataframe, path):
        '''iterate over leaves/branches in data structure.

        This method will call the:meth:`render` method
        '''
        result = ResultBlocks()

        if not self.split_at:
            result.extend(self.render(dataframe, path))
        else:
            # split dataframe at first index
            level = Utils.getGroupLevels(dataframe)
            grouper = dataframe.groupby(level=level)
            if len(grouper) < self.split_at:
                result.extend(self.render(dataframe, path))
            else:
                # build groups
                always, remove_always = [], set()

                if self.split_always:
                    for key, work in grouper:
                        for pat in self.split_always:
                            rx = re.compile(pat)
                            if rx.search(path2str(key)):
                                always.append((key, work))
                                remove_always.add(key)

                    grouper = dataframe.groupby(level=level)

                def _group_group(grouper, always, remove_always):
                    group = always[:]
                    for key, work in grouper:

                        if key in remove_always:
                            continue
                        group.append((key, work))

                        if len(group) >= self.split_at:
                            yield group
                            group = always[:]

                    # reconcile index names
                    yield group

                first = True
                for group in _group_group(grouper,
                                          always,
                                          remove_always):
                    # do not plot last dataframe that contains
                    # only the common tracks to plot
                    if not first and len(group) == len(always):
                        continue
                    first = False

                    df = pandas.concat(
                        [x[1] for x in group])

                    # reconcile index names
                    df.index.names = dataframe.index.names
                    result.extend(self.render(df, path))

        return result
Пример #4
0
    def __call__(self, dataframe, path):
        '''iterate over leaves/branches in data structure.

        This method will call the:meth:`render` method.

        Large dataframes are split into multiple, smaller rendered
        objects if self.split_at is not zero.

        By default, dataframes are split along the hierachical
        index. However, if there is only a single index, but multiple
        columns, the split is performed on the columns instead. This
        is used when splitting coordinate data as a result of the
        histogram transformation.

        '''
        result = ResultBlocks()

        if not self.split_at:
            result.extend(self.render(dataframe, path))
        else:
            # split dataframe at first index
            level = Utils.getGroupLevels(dataframe)
            grouper = dataframe.groupby(level=level)

            # split dataframe column wise if only one index
            # and multiple columns
            if len(grouper) == 1 and len(dataframe.columns) > self.split_at:
                columns = list(dataframe.columns)
                always = []
                if self.split_keep_first_column:
                    always.append(columns[0])
                # columns to always keep
                always.extend([c for c in columns if c in self.split_always])
                columns = [c for c in columns if c not in always]
                for x in range(0, len(columns), self.split_at):
                    # extract a set of columns
                    result.extend(self.render(
                        dataframe.loc[:, always+columns[x:x+self.split_at]],
                        path))
            # split dataframe along index
            elif len(grouper) >= self.split_at:
                # build groups
                always, remove_always = [], set()

                if self.split_always:
                    for key, work in grouper:
                        for pat in self.split_always:
                            rx = re.compile(pat)
                            if rx.search(path2str(key)):
                                always.append((key, work))
                                remove_always.add(key)

                    grouper = dataframe.groupby(level=level)

                def _group_group(grouper, always, remove_always):
                    group = always[:]
                    for key, work in grouper:

                        if key in remove_always:
                            continue
                        group.append((key, work))

                        if len(group) >= self.split_at:
                            yield group
                            group = always[:]

                    # reconcile index names
                    yield group

                first = True
                for group in _group_group(grouper,
                                          always,
                                          remove_always):
                    # do not plot last dataframe that contains
                    # only the common tracks to plot
                    if not first and len(group) == len(always):
                        continue
                    first = False

                    df = pandas.concat(
                        [x[1] for x in group])

                    # reconcile index names
                    df.index.names = dataframe.index.names
                    result.extend(self.render(df, path))
            else:
                # do not split dataframe
                result.extend(self.render(dataframe, path))

        return result
Пример #5
0
    def __call__(self, *args, **kwargs):

        try:
            self.parseArguments(*args, **kwargs)
        except:
            self.error("%s: exception in parsing" % self)
            return ResultBlocks(Utils.buildException("parsing"))

        # collect no data if tracker is the empty tracker
        # and go straight to rendering
        try:
            if self.tracker.getTracks() == ["empty"]:
                # is instance does not work because of module mapping
                # type(Tracker.Empty) == CGATReport.Tracker.Empty
                # type(self.tracker) == Tracker.Empty
                # if isinstance(self.tracker, Tracker.Empty):
                return self.renderer()
        except AttributeError:
            # for function trackers
            pass

        self.debug("profile: started: tracker: %s" % (self.tracker))

        # collecting data
        try:
            self.collect()
        except Exception as ex:
            self.error("%s: exception in collection: %s" % (self, str(ex)))
            return ResultBlocks(
                Utils.buildException("collection"))
        finally:
            self.debug("profile: finished: tracker: %s" % (self.tracker))

        if self.tree is None or len(self.tree) == 0:
            self.info("%s: no data - processing complete" % self.tracker)
            return None

        data_paths = DataTree.getPaths(self.tree)
        self.debug("%s: after collection: %i data_paths: %s" %
                   (self, len(data_paths), str(data_paths)))

        # special Renderers - do not process data further but render
        # directly. Note that no transformations will be applied.
        if isinstance(self.renderer, Renderer.User):
            results = ResultBlocks(title="main")
            results.extend(self.renderer(self.tree))
            return results
        elif isinstance(self.renderer, Renderer.Debug):
            results = ResultBlocks(title="main")
            results.extend(self.renderer(self.tree))
            return results

        # merge all data to hierarchical indexed dataframe
        self.data = DataTree.as_dataframe(self.tree, self.tracker)

        if self.data is None:
            self.info("%s: no data after conversion" % self.tracker)
            return None

        self.debug("dataframe memory usage: total=%i,data=%i,index=%i,col=%i" %
                   (self.data.values.nbytes +
                    self.data.index.nbytes +
                    self.data.columns.nbytes,
                    self.data.values.nbytes,
                    self.data.index.nbytes,
                    self.data.columns.nbytes))

        # if tracks are set by tracker, call tracker with dataframe
        if self.indexFromTracker:
            self.tracker.setIndex(self.data)

        # transform data
        try:
            self.transform()
        except:
            self.error("%s: exception in transformation" % self)
            return ResultBlocks(
                Utils.buildException("transformation"))

        try:
            self.reframe()
        except:
            self.error("%s: exception in reframing" % self)
            return ResultBlocks(Utils.buildException("reframing"))

        # data_paths = DataTree.getPaths(self.data)
        # self.debug("%s: after transformation: %i data_paths: %s" %
        #           (self, len(data_paths), str(data_paths)))
        # restrict
        try:
            self.filterPaths(self.restrict_paths, mode="restrict")
        except:
            self.error("%s: exception in restrict" % self)
            return ResultBlocks(
                Utils.buildException("restrict"))

        # data_paths = DataTree.getPaths(self.data)
        # self.debug("%s: after restrict: %i data_paths: %s" %
        #          (self, len(data_paths), str(data_paths)))
        # exclude
        try:
            self.filterPaths(self.exclude_paths, mode="exclude")
        except:
            self.error("%s: exception in exclude" % self)
            return ResultBlocks(Utils.buildException("exclude"))

        # data_paths = DataTree.getPaths(self.data)
        # self.debug("%s: after exclude: %i data_paths: %s" %
        #          (self, len(data_paths), str(data_paths)))

        # No pruning - maybe enable later as a user option
        self.pruned = []
        # try:
        #     self.prune()
        # except:
        #     self.error("%s: exception in pruning" % self)
        # return ResultBlocks(ResultBlocks(Utils.buildException("pruning")))

        # data_paths = DataTree.getPaths(self.data)
        # self.debug("%s: after pruning: %i data_paths: %s" %
        #           (self, len(data_paths), str(data_paths)))
        try:
            self.group()
        except:
            self.error("%s: exception in grouping" % self)
            return ResultBlocks(Utils.buildException("grouping"))

        # data_paths = DataTree.getPaths(self.data)
        # self.debug("%s: after grouping: %i data_paths: %s" %
        #           (self, len(data_paths), str(data_paths)))
        if self.renderer is not None:
            self.debug("profile: started: renderer: %s" % (self.renderer))

            try:
                result = self.render()
            except:
                self.error("%s: exception in rendering" % self)
                return ResultBlocks(
                    Utils.buildException("rendering"))
            finally:
                self.debug("profile: finished: renderer: %s" % (self.renderer))
        else:
            result = ResultBlocks(title="")

        return result
Пример #6
0
    def render(self):
        '''supply the:class:`Renderer.Renderer` with the data to render.

        The data supplied will depend on the ``groupby`` option.

        returns a ResultBlocks data structure.
        '''
        self.debug("%s: rendering data started for %i items" %
                   (self,
                    len(self.data)))

        # initiate output structure
        results = ResultBlocks(title="")

        dataframe = self.data

        if dataframe is None:
            self.warn("%s: no data after conversion" % self)
            raise ValueError("no data for renderer")

        # special patch: set column names to pruned levels
        # if there are no column names
        if len(dataframe.columns) == len(self.pruned):
            if list(dataframe.columns) == list(range(len(dataframe.columns))):
                dataframe.columns = [x[1] for x in self.pruned]

        nlevels = Utils.getDataFrameLevels(dataframe)

        self.debug("%s: rendering data started. "
                   "levels=%i, group_level=%s" %
                   (self, nlevels,
                    str(self.group_level)))

        if self.group_level < 0:
            # no grouping for renderers that will accept
            # a dataframe with any level of indices and no explicit
            # grouping has been asked for.
            results.extend(self.renderer(dataframe, path=()))
        else:
            level = Utils.getGroupLevels(
                dataframe,
                max_level=self.group_level + 1)

            self.debug("%s: grouping by levels: %s" %
                       (self, str(level)))

            for key, work in dataframe.groupby(level=level):

                try:
                    results.extend(self.renderer(work,
                                                 path=key))
                except:
                    self.error("%s: exception in rendering" % self)
                    results.append(Utils.buildException("rendering"))

        if len(results) == 0:
            self.warn("renderer returned no data.")
            raise ValueError("renderer returned no data.")

        self.debug("%s: rendering data finished with %i blocks" %
                   (self.tracker, len(results)))

        return results