def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: r = [] bg = builder.do_eval(binding["glob"]) for gb in aslist(bg): r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r): raise WorkflowException("Expression must return a file object.") if schema["type"] == "File": if not r: raise WorkflowException("No matches for output file with glob: '{}'".format(bg)) if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file.") r = r[0] if schema["type"] == "File" and "secondaryFiles" in binding: r["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = builder.do_eval(sf, context=r["path"]) else: sfpath = {"path": substitute(r["path"], sf), "class": "File"} if isinstance(sfpath, list): r["secondaryFiles"].extend(sfpath) else: r["secondaryFiles"].append(sfpath) for sf in r["secondaryFiles"]: if not builder.fs_access.exists(sf["path"]): raise WorkflowException("Missing secondary file of '%s' of primary file '%s'" % (sf["path"], r["path"])) if not r and schema["type"] == "record": r = {} for f in schema["fields"]: r[f["name"]] = self.collect_output(f, builder, outdir) return r
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] if "glob" in binding: r = [] for gb in aslist(binding["glob"]): try: gb = builder.do_eval(gb) globpatterns.append(gb) if gb: r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "format" in schema: files["format"] = builder.do_eval(schema["format"], context=files) optional = False singlefile = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"]: singlefile = True elif schema["type"] == "File": singlefile = True if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if singlefile: # Handle single file outputs not wrapped in a list if r is not None and not isinstance(r, (list, tuple)): r = [r] if optional and r is None: pass elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]): raise WorkflowException("Expression must return a file object for %s." % schema["id"]) if singlefile: if not r and not optional: raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns)) elif not r and optional: pass elif isinstance(r, list): if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file.") else: r = r[0] if "secondaryFiles" in schema: for primary in aslist(r): if isinstance(primary, dict): primary["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = builder.do_eval(sf, context=r) if isinstance(sfpath, basestring): sfpath = {"path": sfpath, "class": "File"} else: sfpath = {"path": substitute(primary["path"], sf), "class": "File"} for sfitem in aslist(sfpath): if builder.fs_access.exists(sfitem["path"]): primary["secondaryFiles"].append(sfitem) if not r and optional: r = None if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record": r = {} for f in schema["type"]["fields"]: r[shortname(f["name"])] = self.collect_output(f, builder, outdir) return r
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] revmap = functools.partial(revmap_file, builder, outdir) if "glob" in binding: r = [] for gb in aslist(binding["glob"]): gb = builder.do_eval(gb) if gb: globpatterns.extend(aslist(gb)) for gb in globpatterns: if gb.startswith("/"): raise WorkflowError("glob patterns must not start with '/'") try: r.extend([{"path": g, "class": "File", "hostfs": True} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "format" in schema: files["format"] = builder.do_eval(schema["format"], context=files) optional = False singlefile = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"]: singlefile = True elif schema["type"] == "File": singlefile = True if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if singlefile: # Handle single file outputs not wrapped in a list if r is not None and not isinstance(r, (list, tuple)): r = [r] if optional and r is None: pass elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]): raise WorkflowException("Expression must return a file object for %s." % schema["id"]) if singlefile: if not r and not optional: raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns)) elif not r and optional: pass elif isinstance(r, list): if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file.") else: r = r[0] # Ensure files point to local references outside of the run environment adjustFileObjs(r, revmap) if "secondaryFiles" in schema: for primary in aslist(r): if isinstance(primary, dict): primary["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = builder.do_eval(sf, context=r) if isinstance(sfpath, basestring): sfpath = revmap({"path": sfpath, "class": "File"}) else: sfpath = {"path": substitute(primary["path"], sf), "class": "File", "hostfs": True} for sfitem in aslist(sfpath): if builder.fs_access.exists(sfitem["path"]): primary["secondaryFiles"].append(sfitem) if not r and optional: r = None if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record": r = {} for f in schema["type"]["fields"]: r[shortname(f["name"])] = self.collect_output(f, builder, outdir) return r