def getTransformers(transformers, kwargs={}): '''find and instantiate all transformers.''' result = [] for transformer in transformers: k = "transform-%s" % transformer if k in Component.getPlugins()["transform"]: cls = Component.getPlugins()["transform"][k] instance = cls(**kwargs) else: instance = makeTransformer(transformer, (), kwargs) if not instance: msg = "could not find transformer '%s'. Available transformers:\n %s" % \ (transformer, "\n ".join(sorted(getPlugins()["transform"].keys()))) raise KeyError(msg) result.append(instance) return result
def getRenderer(renderer_name, kwargs={}): '''find and instantiate renderer.''' renderer = None try: cls = Component.getPlugins()["render"]["render-%s" % renderer_name] renderer = cls(**kwargs) except KeyError: # This was uncommented to fix one bug # but uncommenting invalidates user renderers # TODO: needs to be revisited renderer = makeRenderer(renderer_name, kwargs) if not renderer: raise KeyError( "could not find renderer '%s'. Available renderers:\n %s" % (renderer_name, "\n ".join( sorted(Component.getPlugins()["render"].keys())))) return renderer
def setup(app): setup.app = app setup.config = app.config setup.confdir = app.confdir setup.srcdir = app.srcdir setup.builddir = os.getcwd() app.add_directive('report', report_directive) # update global parameters in Utils module. PARAMS = Utils.get_parameters() app.add_config_value('PARAMS', collections.defaultdict(), 'env') setup.logger = Component.get_logger() return {'parallel_read_safe': True}
def run(self): document = self.state.document.current_source logger = Component.get_logger() logger.info("report_directive: starting: %s:%i" % (str(document), self.lineno)) env = self.state.document.settings.env return run(self.arguments, self.options, self.lineno, self.content, self.state_machine, document, build_environment=env)
def run(arguments, options, lineno, content, state_machine=None, document=None, srcdir=None, builddir=None, build_environment=None): """process:report: directive. *srdir* - top level directory of rst documents *builddir* - build directory """ tag = "%s:%i" % (str(document), lineno) logger = Component.get_logger() logger.debug("report_directive.run: profile: started: rst: %s" % tag) # sort out the paths # reference is used for time-stamping tracker_name = directives.uri(arguments[0]) (basedir, fname, basename, ext, outdir, codename, notebookname) = Utils.build_paths(tracker_name) # get the directory of the rst file # state_machine.document.attributes['source']) rstdir, rstfile = os.path.split(document) # root of document tree if srcdir is None: srcdir = setup.srcdir # build directory if builddir is None: builddir = setup.builddir # remove symbolic links srcdir, builddir, rstdir = [ os.path.abspath(os.path.realpath(x)) for x in (srcdir, builddir, rstdir)] # there are three directories: # builddir = directory where document is built in # (usually _build/html or similar) # rstdir = directory where rst sources are located # srcdir = directory from which the build process is started # path to root relative to rst rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir) # path to root relative to rst rst2builddir = os.path.join( os.path.relpath(builddir, start=rstdir), outdir) # path relative to source (for images) root2builddir = os.path.join( os.path.relpath(builddir, start=srcdir), outdir) logger.debug( "report_directive.run: arguments=%s, options=%s, lineno=%s, " "content=%s, document=%s" % (str(arguments), str(options), str(lineno), str(content), str(document))) logger.debug( "report_directive.run: plotdir=%s, basename=%s, ext=%s, " "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" % (tracker_name, basename, ext, fname, rstdir, srcdir, builddir)) logger.debug( "report_directive.run: tracker_name=%s, basedir=%s, " "rst2src=%s, root2build=%s, outdir=%s, codename=%s" % (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename)) # try to create. If several processes try to create it, # testing with `if` will not work. try: os.makedirs(outdir) except OSError as msg: pass if not os.path.exists(outdir): raise OSError("could not create directory %s: %s" % (outdir, msg)) ######################################################## # collect options # replace placedholders try: options = update_options(options) except ValueError as msg: logger.warn("failure while updating options: %s" % msg) logger.debug("report_directive.run: options=%s" % (str(options),)) transformer_names = [] renderer_name = None layout = options.get("layout", "column") long_titles = "long-titles" in options option_map = get_option_map() renderer_options = select_and_delete_options( options, option_map["render"]) transformer_options = select_and_delete_options( options, option_map["transform"]) dispatcher_options = select_and_delete_options( options, option_map["dispatch"]) tracker_options = select_and_delete_options( options, option_map["tracker"], expand=["tracker"]) display_options = get_default_display_options() display_options.update(select_and_delete_options( options, option_map["display"])) logger.debug("report_directive.run: renderer options: %s" % str(renderer_options)) logger.debug("report_directive.run: transformer options: %s" % str(transformer_options)) logger.debug("report_directive.run: dispatcher options: %s" % str(dispatcher_options)) logger.debug("report_directive.run: tracker options: %s" % str(tracker_options)) logger.debug("report_directive.run: display options: %s" % str(display_options)) if "transform" in display_options: transformer_names = display_options["transform"].split(",") del display_options["transform"] if "render" in display_options: renderer_name = display_options["render"] del display_options["render"] ######################################################## # check for missing files if renderer_name is not None: options_key = str(renderer_options) +\ str(transformer_options) +\ str(dispatcher_options) +\ str(tracker_options) +\ str(transformer_names) +\ re.sub("\s", "", "".join(content)) options_hash = hashlib.md5(options_key.encode()).hexdigest()[:10] template_name = Utils.quote_filename( Config.SEPARATOR.join((tracker_name, renderer_name, options_hash))) filename_text = os.path.join(outdir, "%s.txt" % (template_name)) rstname = os.path.basename(filename_text) notebookname += options_hash logger.debug("report_directive.run: options_hash=%s" % options_hash) ########################################################### # check for existing files # update strategy does not use file stamps, but checks # for presence/absence of text element and if all figures # mentioned in the text element are present ########################################################### queries = [re.compile("%s/(\S+.%s)" % (root2builddir, suffix)) for suffix in ("png", "pdf", "svg")] logger.debug("report_directive.run: checking for changed files.") # check if text element exists if os.path.exists(filename_text): with open(filename_text, "r", encoding="utf-8") as inf: lines = [x[:-1] for x in inf] filenames = [] # check if all figures are present for line in lines: for query in queries: x = query.search(line) if x: filenames.extend(list(x.groups())) filenames = [os.path.join(outdir, x) for x in filenames] if len(filenames) == 0: logger.info("report_directive.run: %s: redo: no files found" % tag) else: logger.debug( "report_directive.run: %s: checking for %s" % (tag, str(filenames))) for filename in filenames: if not os.path.exists(filename): logger.info( "report_directive.run: %s: redo: file %s is missing" % (tag, filename)) break else: logger.info( "report_directive.run: %s: noredo: all files are present" % tag) # all is present - save text and return if lines and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) return [] else: logger.debug( "report_directive.run: %s: no check performed: %s missing" % (tag, str(filename_text))) else: template_name = "" filename_text = None collect_here = False ########################################################## # instantiate tracker, dispatcher, renderer and transformers # and collect output ########################################################### try: ######################################################## # find the tracker logger.debug( "report_directive.run: collecting tracker %s with options %s " % (tracker_name, tracker_options)) code, tracker, tracker_path = make_tracker( tracker_name, (), tracker_options) if not tracker: logger.error( "report_directive.run: no tracker - no output from %s " % str(document)) raise ValueError("tracker `%s` not found" % tracker_name) logger.debug( "report_directive.run: collected tracker %s" % tracker_name) tracker_id = Cache.tracker2key(tracker) ######################################################## # determine the transformer logger.debug("report_directive.run: creating transformers") transformers = get_transformers( transformer_names, transformer_options) ######################################################## # determine the renderer logger.debug("report_directive.run: creating renderer.") if renderer_name is None: logger.error( "report_directive.run: no renderer - no output from %s" % str(document)) raise ValueError("the report directive requires a renderer") renderer = get_renderer(renderer_name, renderer_options) try: renderer.set_paths(rstdir, srcdir, builddir) renderer.set_display_options(display_options) renderer.set_build_environment(build_environment) except AttributeError: # User renderers will not have these methods pass ######################################################## # write code output linked_codename = re.sub("\\\\", "/", os.path.join(rst2builddir, codename)) if code and basedir != outdir: if six.PY2: with open(os.path.join(outdir, codename), "w") as outfile: for line in code: outfile.write(line) else: with open(os.path.join(outdir, codename), "w", encoding=get_encoding()) as outfile: for line in code: outfile.write(line) ######################################################## # write notebook snippet linked_notebookname = re.sub( "\\\\", "/", os.path.join(rst2builddir, notebookname)) if basedir != outdir and tracker_id is not None: with open(os.path.join(outdir, notebookname), "w") as outfile: Utils.writeNoteBookEntry(outfile, renderer=renderer_name, tracker=tracker_name, transformers=transformer_names, tracker_path=tracker_path, options=list(renderer_options.items()) + list(tracker_options.items()) + list(transformer_options.items())) if filename_text is not None: linked_rstname = re.sub( "\\\\", "/", os.path.join(rst2builddir, rstname)) else: linked_rstname = None ########################################################## # Initialize collectors links = {'code_url': linked_codename, 'rst_url': linked_rstname, 'notebook_url': linked_notebookname} collectors = [] for name, collector in get_plugins("collect").items(): collectors.append(collector( template_name=template_name, outdir=outdir, rstdir=rstdir, builddir=builddir, srcdir=srcdir, content=content, display_options=display_options, trackerd_id=tracker_id, links=links)) # user renderers might not have a set_collectors method try: collect_here = not renderer.set_collectors(collectors) except AttributeError: collect_here = True ######################################################## # create and call dispatcher logger.debug("report_directive.run: creating dispatcher") dispatcher = Dispatcher.Dispatcher(tracker, renderer, transformers) # add the tracker options dispatcher_options.update(tracker_options) blocks = dispatcher(**dispatcher_options) if blocks is None: blocks = ResultBlocks( Utils.buildWarning( "NoData", "tracker %s returned no Data" % str(tracker))) code = None tracker_id = None except: exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() tb = "\n".join(traceback.format_tb(exceptionTraceback)) logger.error( "report_directive.run: exception caught at %s:%i: %s %s\n%s\n" % (str(document), lineno, exceptionType, exceptionValue, tb)) blocks = ResultBlocks(Utils.buildException("invocation")) code = None tracker_id = None links = {'code_url': "", 'rst_url': "", 'notebook_url': ""} logger.debug( "report_directive.run: profile: started: collecting: %s" % tag) ########################################################### # replace place holders or add text ########################################################### # add default for text-only output requested_urls = as_list(Utils.get_params()["report_urls"]) urls = [] if "code" in requested_urls: urls.append(":download:`code <%(code_url)s>`" % links) if "notebook" in requested_urls: urls.append(":download:`nb <%(notebook_url)s>`" % links) map_figure2text = {} if collect_here: for collector in collectors: map_figure2text.update(collector.collect(blocks)) map_figure2text["default-prefix"] = "" map_figure2text["default-suffix"] = "" if urls and "no-links" not in display_options: url_template = "[{}]".format(" ".join(urls)) else: url_template = "" map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals() blocks.updatePlaceholders(map_figure2text) # render the output taking into account the layout lines = Utils.layoutBlocks(blocks, layout, long_titles=long_titles) lines.append("") # add caption if content and "no-caption" not in display_options: lines.extend(['::', '']) lines.extend([' %s' % row.strip() for row in content]) lines.append("") # encode lines if six.PY2: lines = [force_encode(x, encoding="ascii", errors="replace") for x in lines] # output rst text for this renderer if filename_text: if six.PY2: with open(filename_text, "w") as outf: outf.write("\n".join(lines)) else: with open(filename_text, "w", encoding=get_encoding()) as outf: outf.write("\n".join(lines)) if CGATREPORT_DEBUG: for x, l in enumerate(lines): try: print(("%5i %s" % (x, l))) except UnicodeEncodeError: print(("line skipped - unicode error")) pass if len(lines) and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) logger.debug( "report_directive.run: profile: finished: collecting: %s" % tag) logger.debug( "report_directive.run: profile: finished: rst: %s:%i" % (str(document), lineno)) return []
def tree2table(data, transpose=False, head=None): """build table from data. The table will be multi-level (main-rows and sub-rows), if: 1. there is more than one column 2. each cell within a row is a list or tuple If any of the paths contain tuples/lists, these are expanded to extra columns as well. If head is given, only first head rows are output. returns matrix, row_headers, col_headers """ logger = Component.get_logger() labels = getPaths(data) if len(labels) < 2: raise ValueError("expected at least two levels for building table, got %i: %s" % (len(labels), str(labels))) effective_labels = count_levels(labels) # subtract last level (will be expanded) and 1 for row header effective_cols = sum(effective_labels[:-1]) - 1 col_headers = [""] * effective_cols + labels[-1] ncols = len(col_headers) paths = list(itertools.product(*labels[1:-1])) header_offset = effective_cols matrix = [] logger.debug( "Datatree.buildTable: creating table with %i columns" % (len(col_headers))) # the following can be made more efficient # by better use of indices row_offset = 0 row_headers = [] # iterate over main rows for x, row in enumerate(labels[0]): first = True for xx, path in enumerate(paths): # get data - skip if there is None work = getLeaf(data, (row,) + path) if isinstance(work, pandas.DataFrame): if work.empty: continue else: if not work: continue row_data = [""] * ncols # add row header only for first row (if there are sub-rows) if first: if type(row) in Utils.ContainerTypes: row_headers.append(row[0]) for z, p in enumerate(row[1:]): row_data[z] = p else: row_headers.append(row) first = False else: row_headers.append("") # enter data for the first row for z, p in enumerate(path): row_data[z] = p # check for multi-level rows is_container = True max_rows = None for y, column in enumerate(labels[-1]): if column not in work: continue if type(work[column]) not in Utils.ContainerTypes: is_container = False break if max_rows == None: max_rows = len(work[column]) elif max_rows != len(work[column]): raise ValueError("multi-level rows - unequal lengths: %i != %i" % (max_rows, len(work[column]))) # add sub-rows if is_container: # multi-level rows for z in range(max_rows): for y, column in enumerate(labels[-1]): try: row_data[ y + header_offset] = Utils.quote_rst(work[column][z]) except KeyError: pass if z < max_rows - 1: matrix.append(row_data) row_headers.append("") row_data = [""] * ncols else: # single level row for y, column in enumerate(labels[-1]): try: row_data[ y + header_offset] = Utils.quote_rst(work[column]) except KeyError: pass matrix.append(row_data) if head and len(matrix) >= head: break if transpose: row_headers, col_headers = col_headers, row_headers matrix = list(zip(*matrix)) # convert headers to string (might be None) row_headers = [str(x) for x in row_headers] col_headers = [str(x) for x in col_headers] return matrix, row_headers, col_headers
def as_dataframe(data, tracker=None): '''convert data tree to pandas DataFrame. The data frame is multi-indexed according to the depth within the data tree. If the data-tree has only one level, the data will be single-indexed because pandas will not tolerate a single level MultiIndex. The code assumes that the data tree has a uniform depth and structure. The inner-most level in the *data* tree will be columns. However, if *data* is only a single-level dictionary, the keys in the dictionary will be row labels and the resultant dataframe will have only one column. Depending on the type of the leaf, the data frame is constructed as follows: Leaves are multiple arrays of the same size The data is assumed to be coordinate type data (x,y,z values). Leaves will be added to a dataframe as multiple columns. Leaves are a single array or arrays with dissimilar size A melted data frame will be constructed where the hierarchical index contains the path information and the data frame has a single column with the value. Leaf is a dataframe Dataframes will be concatenated. Existing indices of the dataframes will be preserved with the exception of the trivial index for the row numbers. Requires: All dataframes need to have the same columns. Leaf is a scalar Dataframes will be built from a nested dictionary Special cases for backwards compatibility: 1. Lowest level dictionary contains the following arrays: rows, columns, matrix - numpy matrix, convert to dataframe and apply as above 2. Lowest level dictionary contains the following keys: '01', '10', '11' - Venn 2-set data, convert columns '001', '010', ... - Venn 3-set data, convert columns Pandas attempts to find a column data type that will fit all values in a column. Thus, if a column is numeric, but contains values such as "inf", "Inf", as well, the column type might be set to object or char. ''' if data is None or len(data) == 0: return None logger = Component.get_logger() levels = getDepths(data) if len(levels) == 0: return None mi, ma = min(levels), max(levels) if mi != ma: raise NotImplementedError( 'data tree not of uniform depth, min=%i, max=%i' % (mi, ma)) labels = getPaths(data) ###################################################### ###################################################### ###################################################### # check special cases MATRIX = ('rows', 'columns', 'matrix') VENN2 = ('10', '01', '11') VENN3 = ('010', '001', '011') dataframe_prune_index = True branches = list(getNodes(data, len(labels) - 2)) for path, branch in branches: # numpy matrix - dictionary with keys matrix, rows, columns if len(set(branch.keys()).intersection(MATRIX)) == len(MATRIX): df = pandas.DataFrame(branch['matrix'], columns=branch['columns'], index=branch['rows']) setLeaf(data, path, df) dataframe_prune_index = False elif len(set(branch.keys()).intersection(VENN2)) == len(VENN2) or \ len(set(branch.keys()).intersection(VENN3)) == len(VENN3): # sort so that 'labels' is not the first item # specify data such that 'labels' will a single tuple entry values = sorted(branch.items()) df = listAsDataFrame(values) dataframe_prune_index = False setLeaf(data, path, df) ###################################################### ###################################################### ###################################################### labels = getPaths(data) # build multi-index leaves = list(getNodes(data, len(labels) - 1)) # if set to a number, any superfluous levels in the # hierarchical index of the final dataframe will # be removed. expected_levels = None leaf = leaves[0][1] if is_array(leaf): # build dataframe from arrays dataframes = [] index_tuples = [] # not a nested dictionary if len(labels) == 1: branches = [(('all',), data)] else: branches = list(getNodes(data, max(0, len(labels) - 2))) # check if it is coordinate data # All arrays need to have the same length is_coordinate = True for path, subtree in branches: lengths = [len(x) for x in list(subtree.values())] if len(lengths) == 0: continue # all arrays have the same length - coordinate data if len(lengths) == 1 or min(lengths) != max(lengths): is_coordinate = False break if is_coordinate: logger.debug('dataframe conversion: from array - coordinates') for path, leaves in branches: # skip empty leaves if len(leaves) == 0: continue dataframes.append(pandas.DataFrame(leaves)) index_tuples.append(path) else: logger.debug('dataframe conversion: from array - series') # arrays of unequal length are measurements # build a melted data frame with a single column # given by the name of the path. for key, leave in leaves: # skip empty leaves if len(leave) == 0: continue index_tuples.append(key) dataframes.append(pandas.DataFrame(leave, columns=('value',))) expected_levels = len(index_tuples[0]) df = concatDataFrames(dataframes, index_tuples) elif is_dataframe(leaf): logger.debug('dataframe conversion: from dataframe') # build dataframe from list of dataframes # by concatenation. # Existing indices of the dataframes will # be added as columns. dataframes = [] index_tuples = [] path_lengths = [] levels = [] for path, dataframe in leaves: if len(dataframe) == 0: continue path_lengths.append(len(path)) if len(path) == 1: # if only one level, do not use tuple index_tuples.append(path[0]) else: index_tuples.append(path) dataframes.append(dataframe) levels.append(Utils.getDataFrameLevels( dataframe, test_for_trivial=True)) if len(path_lengths) == 0: return None assert min(path_lengths) == max(path_lengths) assert min(levels) == max(levels) # if only a single dataframe without given # tracks, return dataframe if index_tuples == ["all"]: df = dataframes[0] # if index is a simple numeric list, change to "all" if isinstance(df.index, pandas.Int64Index) and \ df.index.name is None: df.index = ["all"] * len(df) return df expected_levels = min(path_lengths) + min(levels) df = concatDataFrames(dataframes, index_tuples) else: logger.debug('dataframe conversion: from values') if len(labels) == 1: # { 'x': 1, 'y': 2 } -> DF with one row and two columns (x, y) df = pandas.DataFrame(list(data.values()), index=list(data.keys())) elif len(labels) == 2: # { 'a': {'x':1, 'y':2}, 'b': {'y',2} # -> DF with two columns(x,y) and two rows(a,b) df = pandas.DataFrame.from_dict(data).transpose() # reorder so that order of columns corresponds to data df = df[labels[-1]] else: # We are dealing with a simple nested dictionary branches = list(getNodes(data, max(0, len(labels) - 3))) dataframes = [] index_tuples = [] for path, nested_dict in branches: # transpose to invert columns and rows # in cgatreport convention, the deeper # level in a dictionary in cgatreport are columns, while # in pandas they are rows. df = pandas.DataFrame(nested_dict).transpose() dataframes.append(df) index_tuples.extend([path]) df = concatDataFrames(dataframes, index_tuples) # remove index with row numbers if expected_levels is not None and dataframe_prune_index: Utils.pruneDataFrameIndex(df, expected_levels) # rename levels in hierarchical index is_hierarchical = isinstance(df.index, pandas.core.index.MultiIndex) if is_hierarchical: n = list(df.index.names) try: if tracker is not None: l = getattr(tracker, "levels") except AttributeError: l = ["track", "slice"] + ["level%i" % x for x in range(len(n))] for x, y in enumerate(n): if y is None: n[x] = l[x] df.index.names = n else: df.index.name = 'track' return df
def run(arguments, options, lineno, content, state_machine=None, document=None, srcdir=None, builddir=None): """process:report: directive. *srdir* - top level directory of rst documents *builddir* - build directory """ tag = "%s:%i" % (str(document), lineno) logging.debug("report_directive.run: profile: started: rst: %s" % tag) # sort out the paths # reference is used for time-stamping tracker_name = directives.uri(arguments[0]) (basedir, fname, basename, ext, outdir, codename, notebookname) = Utils.build_paths(tracker_name) # get the directory of the rst file # state_machine.document.attributes['source']) rstdir, rstfile = os.path.split(document) # root of document tree if srcdir is None: srcdir = setup.srcdir # build directory if builddir is None: builddir = setup.builddir # remove symbolic links srcdir, builddir, rstdir = [ os.path.realpath(x) for x in (srcdir, builddir, rstdir)] # there are three directories: # builddir = directory where document is built in # (usually _build/html or similar) # rstdir = directory where rst sources are located # srcdir = directory from which the build process is started # path to root relative to rst rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir) # path to root relative to rst rst2builddir = os.path.join( os.path.relpath(builddir, start=rstdir), outdir) # path relative to source (for images) root2builddir = os.path.join( os.path.relpath(builddir, start=srcdir), outdir) logging.debug( "report_directive.run: arguments=%s, options=%s, lineno=%s, " "content=%s, document=%s" % (str(arguments), str(options), str(lineno), str(content), str(document))) logging.debug( "report_directive.run: plotdir=%s, basename=%s, ext=%s, " "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" % (tracker_name, basename, ext, fname, rstdir, srcdir, builddir)) logging.debug( "report_directive.run: tracker_name=%s, basedir=%s, " "rst2src=%s, root2build=%s, outdir=%s, codename=%s" % (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename)) # try to create. If several processes try to create it, # testing with `if` will not work. try: os.makedirs(outdir) except OSError as msg: pass if not os.path.exists(outdir): raise OSError("could not create directory %s: %s" % (outdir, msg)) ######################################################## # collect options # replace placedholders try: options = Utils.updateOptions(options) except ValueError as msg: logging.warn("failure while updating options: %s" % msg) logging.debug("report_directive.run: options=%s" % (str(options),)) transformer_names = [] renderer_name = None # get layout option layout = options.get("layout", "column") option_map = Component.getOptionMap() renderer_options = Utils.selectAndDeleteOptions( options, option_map["render"]) transformer_options = Utils.selectAndDeleteOptions( options, option_map["transform"]) dispatcher_options = Utils.selectAndDeleteOptions( options, option_map["dispatch"]) tracker_options = Utils.selectAndDeleteOptions( options, option_map["tracker"]) display_options = Utils.selectAndDeleteOptions( options, option_map["display"]) logging.debug("report_directive.run: renderer options: %s" % str(renderer_options)) logging.debug("report_directive.run: transformer options: %s" % str(transformer_options)) logging.debug("report_directive.run: dispatcher options: %s" % str(dispatcher_options)) logging.debug("report_directive.run: tracker options: %s" % str(tracker_options)) logging.debug("report_directive.run: display options: %s" % str(display_options)) if "transform" in display_options: transformer_names = display_options["transform"].split(",") del display_options["transform"] if "render" in display_options: renderer_name = display_options["render"] del display_options["render"] ######################################################## # check for missing files if renderer_name is not None: options_key = str(renderer_options) +\ str(transformer_options) +\ str(dispatcher_options) +\ str(tracker_options) +\ str(transformer_names) options_hash = hashlib.md5(options_key.encode()).hexdigest() template_name = Utils.quote_filename( Config.SEPARATOR.join((tracker_name, renderer_name, options_hash))) filename_text = os.path.join(outdir, "%s.txt" % (template_name)) notebookname += options_hash logging.debug("report_directive.run: options_hash=%s" % options_hash) ########################################################### # check for existing files # update strategy does not use file stamps, but checks # for presence/absence of text element and if all figures # mentioned in the text element are present ########################################################### queries = [re.compile("%s(%s\S+.%s)" % (root2builddir, outdir, suffix)) for suffix in ("png", "pdf", "svg")] logging.debug("report_directive.run: checking for changed files.") # check if text element exists if os.path.exists(filename_text): lines = [x[:-1] for x in open(filename_text, "r").readlines()] filenames = [] # check if all figures are present for line in lines: for query in queries: x = query.search(line) if x: filenames.extend(list(x.groups())) logging.debug( "report_directive.run: %s: checking for %s" % (tag, str(filenames))) for filename in filenames: if not os.path.exists(filename): logging.info( "report_directive.run: %s: redo: %s missing" % (tag, filename)) break else: logging.info( "report_directive.run: %s: noredo: all files are present" % tag) # all is present - save text and return if lines and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) return [] else: logging.debug( "report_directive.run: %s: no check performed: %s missing" % (tag, str(filename_text))) else: template_name = "" filename_text = None ########################################################## # Initialize collectors collectors = [] for collector in list(Component.getPlugins("collect").values()): collectors.append(collector()) ########################################################## # instantiate tracker, dispatcher, renderer and transformers # and collect output ########################################################### try: ######################################################## # find the tracker logging.debug( "report_directive.run: collecting tracker %s with options %s " % (tracker_name, tracker_options)) code, tracker, tracker_path = Utils.makeTracker( tracker_name, (), tracker_options) if not tracker: logging.error( "report_directive.run: no tracker - no output from %s " % str(document)) raise ValueError("tracker `%s` not found" % tracker_name) logging.debug( "report_directive.run: collected tracker %s" % tracker_name) tracker_id = Cache.tracker2key(tracker) ######################################################## # determine the transformer logging.debug("report_directive.run: creating transformers") transformers = Utils.getTransformers( transformer_names, transformer_options) ######################################################## # determine the renderer logging.debug("report_directive.run: creating renderer.") if renderer_name is None: logging.error( "report_directive.run: no renderer - no output from %s" % str(document)) raise ValueError("the report directive requires a renderer") renderer = Utils.getRenderer(renderer_name, renderer_options) try: renderer.set_paths(rstdir, srcdir, builddir) renderer.set_display_options(display_options) except AttributeError: # User renderers will not have these methods pass ######################################################## # create and call dispatcher logging.debug("report_directive.run: creating dispatcher") dispatcher = Dispatcher.Dispatcher(tracker, renderer, transformers) # add the tracker options dispatcher_options.update(tracker_options) blocks = dispatcher(**dispatcher_options) if blocks is None: blocks = ResultBlocks(ResultBlocks( Utils.buildWarning( "NoData", "tracker %s returned no Data" % str(tracker)))) code = None tracker_id = None except: logging.warn( "report_directive.run: exception caught at %s:%i - see document" % (str(document), lineno)) blocks = ResultBlocks(ResultBlocks( Utils.buildException("invocation"))) code = None tracker_id = None logging.debug( "report_directive.run: profile: started: collecting: %s" % tag) ######################################################## # write code output linked_codename = re.sub("\\\\", "/", os.path.join(rst2srcdir, codename)) if code and basedir != outdir: with open(os.path.join(outdir, codename), "w") as outfile: for line in code: outfile.write(line) ######################################################## # write notebook snippet linked_notebookname = re.sub( "\\\\", "/", os.path.join(rst2srcdir, notebookname)) if basedir != outdir and tracker_id is not None: with open(os.path.join(outdir, notebookname), "w") as outfile: Utils.writeNoteBookEntry(outfile, renderer=renderer_name, tracker=tracker_name, transformers=transformer_names, tracker_path=tracker_path, options=renderer_options.items() + tracker_options.items() + transformer_options.items()) ########################################################### # collect images ########################################################### map_figure2text = {} links = {'code_url': linked_codename, 'notebook_url': linked_notebookname} try: for collector in collectors: map_figure2text.update(collector.collect( blocks, template_name, outdir, rstdir, builddir, srcdir, content, display_options, tracker_id, links=links)) except: logging.warn("report_directive.run: exception caught while " "collecting with %s at %s:%i - see document" % (collector, str(document), lineno)) blocks = ResultBlocks(ResultBlocks( Utils.buildException("collection"))) code = None tracker_id = None ########################################################### # replace place holders or add text ########################################################### # add default for text-only output urls = Utils.asList(Utils.PARAMS["report_urls"]) code_url, nb_url = "", "" if "code" in urls: code_url = "`code <%(code_url)s>`__" % links if "notebook" in urls: nb_url = '`nb <%(notebook_url)s>`__' % links map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals() map_figure2text["default-suffix"] = "" blocks.updatePlaceholders(map_figure2text) # render the output taking into account the layout lines = Utils.layoutBlocks(blocks, layout) lines.append("") # add caption lines.extend(['::', '']) if content: lines.extend([' %s' % row.strip() for row in content]) lines.append("") lines.append("") # output rst text for this renderer if filename_text: outfile = open(filename_text, "w") outfile.write("\n".join(lines)) outfile.close() if CGATREPORT_DEBUG: for x, l in enumerate(lines): print("%5i %s" % (x, l)) if len(lines) and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) logging.debug( "report_directive.run: profile: finished: collecting: %s" % tag) logging.debug( "report_directive.run: profile: finished: rst: %s:%i" % (str(document), lineno)) return []
def main(argv=None, **kwargs): '''main function for test.py. Long-form of command line arguments can also be supplied as kwargs. If argv is not None, command line parsing will be performed. ''' logger = Component.get_logger() parser = optparse.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--tracker", dest="tracker", type="string", help="tracker to use [default=%default]") parser.add_option("-p", "--page", dest="page", type="string", help="render an rst page [default=%default]") parser.add_option("-a", "--tracks", dest="tracks", type="string", help="tracks to use [default=%default]") parser.add_option("-m", "--transformer", dest="transformers", type="string", action="append", help="add transformation [default=%default]") parser.add_option("-s", "--slices", dest="slices", type="string", help="slices to use [default=%default]") parser.add_option("-r", "--renderer", dest="renderer", type="string", help="renderer to use [default=%default]") parser.add_option("-w", "--path", "--trackerdir", dest="trackerdir", type="string", help="path to trackers [default=%default]") parser.add_option("-f", "--force", dest="force", action="store_true", help="force recomputation of data by deleting cached " "results [default=%default]") parser.add_option("-o", "--option", dest="options", type="string", action="append", help="renderer options - supply as key=value pairs " "(without spaces). [default=%default]") parser.add_option("-l", "--language", dest="language", type="choice", choices=("rst", "notebook"), help="output language for snippet. Use ``rst`` " "to create a snippet to paste " "into a cgatreport document. Use ``notebook`` to " "create a snippet to paste " "into an ipython notebook [default=%default]") parser.add_option("--no-print", dest="do_print", action="store_false", help="do not print an rst text element to create " "the displayed plots [default=%default].") parser.add_option("--no-show", dest="do_show", action="store_false", help="do not show a plot [default=%default].") parser.add_option("--layout", dest="layout", type="string", help="output rst with layout [default=%default].") parser.add_option("-i", "--start-interpreter", dest="start_interpreter", action="store_true", help="do not render, but start python interpreter " "[default=%default].") parser.add_option("-I", "--ii", "--start-ipython", dest="start_ipython", action="store_true", help="do not render, start ipython interpreter " "[default=%default].") parser.add_option( "--workdir", dest="workdir", type="string", help="working directory - change to this directory " "before executing " "[default=%default]") parser.add_option( "--hardcopy", dest="hardcopy", type="string", help="output images of plots. The parameter should " "contain one or more %s " "The suffix determines the type of plot. " "[default=%default].") parser.set_defaults( loglevel=1, tracker=None, transformers=[], tracks=None, slices=None, options=[], renderer="table", do_show=True, do_print=True, force=False, trackerdir=TRACKERDIR, caption="add caption here", start_interpreter=False, start_ipython=False, language="rst", workdir=None, layout=None, dpi=100) if argv is None and len(kwargs) == 0: argv = sys.argv if argv: (options, args) = parser.parse_args(argv) else: (options, args) = parser.parse_args([]) ###################################################### # set keyword arguments as options for keyword, value in list(kwargs.items()): if hasattr(options, keyword): setattr(options, keyword, value) del kwargs[keyword] # change some kwarguments if options.transformers: for keyword, value in list(kwargs.items()): if keyword.startswith("tf"): kwargs["tf-{}".format(keyword[2:])] = value if options.workdir is not None: savedir = os.getcwd() os.chdir(options.workdir) else: savedir = None if args: update_options_from_blob(kwargs, options, args) Utils.update_parameters(sorted(glob.glob("*.ini"))) ###################################################### # configure options options.trackerdir = os.path.abspath( os.path.expanduser(options.trackerdir)) if os.path.exists(options.trackerdir): sys.path.insert(0, options.trackerdir) else: logger.warn("directory %s does not exist" % options.trackerdir) ###################################################### # test plugins for x in options.options: if "=" in x: data = x.split("=") key, val = [y.strip() for y in (data[0], "=".join(data[1:]))] else: key, val = x.strip(), None kwargs[key] = val if options.tracks: kwargs["tracks"] = options.tracks if options.slices: kwargs["slices"] = options.slices kwargs = update_options(kwargs) option_map = get_option_map() renderer_options = select_and_delete_options( kwargs, option_map["render"]) transformer_options = select_and_delete_options( kwargs, option_map["transform"]) display_options = select_and_delete_options( kwargs, option_map["display"]) tracker_options = select_and_delete_options( kwargs, option_map["tracker"], expand=["tracker"]) ###################################################### # decide whether to render or not if options.renderer == "none" or options.start_interpreter or \ options.start_ipython or options.language == "notebook": renderer = None else: renderer = get_renderer(options.renderer, {**renderer_options, **kwargs}) try: rstdir = os.getcwd() srcdir = os.getcwd() builddir = os.getcwd() renderer.set_paths(rstdir, srcdir, builddir) renderer.set_display_options(display_options) except AttributeError: # User renderers will not have these methods pass transformers = get_transformers( options.transformers, transformer_options) exclude = set(("Tracker", "TrackerSQL", "returnLabeledData", "returnMultipleColumnData", "returnMultipleColumns", "returnSingleColumn", "returnSingleColumnData", "SQLError", "MultipleColumns", "MultipleColumnData", "LabeledData", "DataSimple", "Data")) ###################################################### # build from tracker if options.tracker: if "." in options.tracker: parts = options.tracker.split(".") tracker_modulename = ".".join(parts[:-1]) tracker_name = parts[-1] else: tracker_modulename = None tracker_name = options.tracker try: _code, tracker, tracker_path = make_tracker( options.tracker, (), tracker_options) except ImportError: # try to find class in module trackers = [] for filename in glob.glob( os.path.join(options.trackerdir, "*.py")): modulename = os.path.basename(filename) trackers.extend( [x for x in get_available_trackers(modulename) if x[0] not in exclude]) for name, tracker_class, modulename, is_derived in trackers: if name == tracker_name: if tracker_modulename is not None: if modulename == tracker_modulename: break else: tracker_modulename = modulename break else: available_trackers = set([x[0] for x in trackers if x[3]]) print(( "unknown tracker '%s': possible trackers are\n %s" % (options.tracker, "\n ".join(sorted(available_trackers))))) print( "(the list above does not contain functions).") sys.exit(1) # instantiate functors if is_derived: tracker = tracker_class(**kwargs) # but not functions else: tracker = tracker_class # remove everything related to that tracker for a clean slate if options.force: removed = CGATReport.clean.removeTracker(tracker_name) print(("removed all data for tracker %s: %i files" % (tracker_name, len(removed)))) dispatcher = Dispatcher(tracker, renderer, transformers) if renderer is None: # dispatcher.parseArguments(**kwargs) # result = dispatcher.collect() # result = dispatcher.transform() result = dispatcher(**kwargs) options.do_print = options.language == "notebook" options.do_show = False options.hardcopy = False else: # needs to be resolved between renderer and dispatcher options result = dispatcher(**kwargs) if options.do_print: sys.stdout.write(".. ---- TEMPLATE START --------\n\n") if options.language == "rst": writeRST(sys.stdout, options, kwargs, renderer_options, transformer_options, display_options, tracker_modulename, tracker_name) elif options.language == "notebook": writeNotebook(sys.stdout, options, kwargs, renderer_options, transformer_options, display_options, tracker_modulename, tracker_name) sys.stdout.write("\n.. ---- TEMPLATE END ----------\n") sys.stdout.write("\n.. ---- OUTPUT-----------------\n") if result and renderer is not None: if options.layout is not None: lines = Utils.layoutBlocks(result, layout=options.layout) print(("\n".join(lines))) else: for r in result: if r.title: print("") print(("title: %s" % r.title)) print("") for ss in str(r).split("\n"): print(force_encode(ss)) if options.hardcopy: fig_managers = _pylab_helpers.Gcf.get_all_fig_managers() # create all the images for figman in fig_managers: # create all images figid = figman.num outfile = re.sub("%s", str(figid), options.hardcopy) figman.canvas.figure.savefig(outfile, dpi=options.dpi) if result and options.do_show: if options.renderer.startswith("r-"): for r in result: if hasattr(r, 'rggplot'): from rpy2.robjects import r as R import rpy2.rinterface try: R.plot(r.rggplot) except rpy2.rinterface.RRuntimeError as msg: if re.search("object.*not found", str(msg)): print(('%s: available columns in dataframe=%s' % (msg, R('''colnames(rframe)''')))) print("press Ctrl-c to stop") while 1: pass elif len(_pylab_helpers.Gcf.get_all_fig_managers()) > 0: plt.show() else: for rr in result: if hasattr(r, 'xls'): tmpfile, outpath = tempfile.mkstemp( dir='.', suffix='.xlsx') os.close(tmpfile) print(('saving xlsx to %s' % outpath)) r.xls.save(outpath) elif hasattr(r, 'bokeh'): import bokeh.plotting as bk bk.show(r.bokeh) ###################################################### # build page elif options.page: from CGATReport import build CGATReport.report_directive.DEBUG = True CGATReport.report_directive.FORCE = True if not os.path.exists(options.page): raise IOError("page %s does not exist" % options.page) options.num_jobs = 1 build.buildPlots( [options.page, ], options, [], os.path.dirname(options.page)) if options.do_show: if options.renderer.startswith("r-"): print("press Ctrl-c to stop") while 1: pass elif _pylab_helpers.Gcf.get_all_fig_managers() > 0: plt.show() else: raise ValueError( "please specify either a tracker " "(-t/--tracker) or a page (-p/--page) to test") if savedir is not None: os.chdir(savedir) if options.tracker and renderer is None: datatree = dispatcher.getDataTree() dataframe = dispatcher.getDataFrame() # trying to push R objects # from rpy2.robjects import r as R # for k, v in flat_iterator(datatree): # try: # R.assign(k, v) # except ValueError, msg: # print ("could not push %s: %s" % (k,msg)) # pass # print ("----------------------------------------") if options.start_interpreter: print ("--> cgatreport - available data structures <--") print((" datatree=%s" % type(datatree))) print((" dataframe=%s" % type(dataframe))) interpreter = code.InteractiveConsole( dict(list(globals().items()) + list(locals().items()))) interpreter.interact() return dataframe elif options.start_ipython: import IPython IPython.embed() return dataframe return dataframe