def test_prev_uploaded(self, upl): """Test pathmapper handling previously uploaded files.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) arvrunner.add_uploaded( 'tests/hw.py', MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File')) upl.side_effect = upload_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def setup(self, referenced_files, basedir): log.debug("PATHMAPPER: " + pformat(referenced_files)) self._pathmap = {} for src in referenced_files: log.debug('SOURCE: ' + str(src)) if src['location'].startswith("fs://"): target_name = os.path.basename(src['location']) self._pathmap[src['location']] = MapperEnt( resolved=src['location'], target=os.path.join(BASE_MOUNT, target_name), type=src['class']) elif src['location'].startswith("file://"): src_path = src['location'][7:] log.debug("Copying %s to shared %s" % (src['location'], self.store_base)) dst = os.path.join(self.store_base, os.path.basename(src_path)) shutil.copy(src_path, dst) location = "fs://%s" % (os.path.basename(src['location'])) self._pathmap[src['location']] = MapperEnt( resolved=location, target=os.path.join(BASE_MOUNT, os.path.basename(src['location'])), type=src['class']) else: raise Exception("Unknown file source: %s" % (src['location'])) log.debug('PATHMAP: ' + pformat(self._pathmap))
def visit(self, obj, stagedir, basedir, copy=False, staged=False): # type: (Dict[unicode, Any], unicode, unicode, bool) -> None loc = obj["location"] tgt = os.path.join(stagedir, obj["basename"]) basetgt, baseext = os.path.splitext(tgt) n = 1 while tgt in self.targets: n += 1 tgt = "%s_%i%s" % (basetgt, n, baseext) self.targets.add(tgt) if obj["class"] == "Directory": self._pathmap[loc] = MapperEnt(loc, tgt, "Directory", staged) if loc.startswith("_:") or self._follow_dirs: self.visitlisting(obj.get("listing", []), tgt, basedir) elif obj["class"] == "File": if loc in self._pathmap: return if "contents" in obj and loc.startswith("_:"): self._pathmap[loc] = MapperEnt(obj["contents"], tgt, "CreateFile", staged) else: if copy: self._pathmap[loc] = MapperEnt(loc, tgt, "WritableFile", staged) else: self._pathmap[loc] = MapperEnt(loc, tgt, "File", staged) self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir)
def visit(self, srcobj, uploadfiles): src = srcobj["location"] if srcobj["class"] == "File": if "#" in src: src = src[:src.index("#")] if isinstance(src, basestring) and ArvPathMapper.pdh_path.match(src): self._pathmap[src] = MapperEnt( src, self.collection_pattern % src[5:], "File") if src not in self._pathmap: # Local FS ref, may need to be uploaded or may be on keep # mount. ab = abspath(src, self.input_basedir) st = arvados.commands.run.statfile("", ab, fnPattern=self.file_pattern) if isinstance(st, arvados.commands.run.UploadFile): uploadfiles.add((src, ab, st)) elif isinstance(st, arvados.commands.run.ArvFile): self._pathmap[src] = MapperEnt(ab, st.fn, "File") elif src.startswith("_:") and "contents" in srcobj: pass else: raise WorkflowException("Input file path '%s' is invalid" % st) if "secondaryFiles" in srcobj: for l in srcobj["secondaryFiles"]: self.visit(l, uploadfiles) elif srcobj["class"] == "Directory": if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src): self._pathmap[src] = MapperEnt( src, self.collection_pattern % src[5:], "Directory") for l in srcobj["listing"]: self.visit(l, uploadfiles)
def setup(self, referenced_files, basedir): self._pathmap = {} for src in referenced_files: logging.debug(src) if DEBUG: print "pathing", src if src['location'].startswith("fs://"): target_name = os.path.basename(src['location']) self._pathmap[src['location']] = MapperEnt( resolved=src['location'], target=os.path.join(BASE_MOUNT, target_name), type=src['class']) elif src['location'].startswith("file://"): src_path = src['location'][7:] logging.debug("Copying %s to shared %s" % (src['location'], self.store_base)) dst = os.path.join(self.store_base, os.path.basename(src_path)) print src_path shutil.copy(src_path, dst) location = "fs://%s" % (os.path.basename(src['location'])) self._pathmap[src['location']] = MapperEnt( resolved=location, target=os.path.join(BASE_MOUNT, os.path.basename(src['location'])), type=src['class']) else: raise Exception("Unknown file source: %s" % (src['location']))
def visit(self, obj, stagedir, basedir, copy=False, staged=False): # type: (Dict[Text, Any], Text, Text, bool, bool) -> None tgt = os.path.join(stagedir, obj["basename"]) if obj["location"] in self._pathmap: return if obj["class"] == "Directory": if obj["location"].startswith("file://"): resolved = schema_salad.ref_resolver.uri_file_path( obj["location"]) else: resolved = obj["location"] self._pathmap[obj["location"]] = MapperEnt( resolved, tgt, "WritableDirectory" if copy else "Directory", staged) if obj["location"].startswith("file://") \ and not self.stage_listing: staged = False self.visitlisting( obj.get("listing", []), tgt, basedir, copy=copy, staged=staged) elif obj["class"] == "File": loc = obj["location"] if "contents" in obj and obj["location"].startswith("_:"): self._pathmap[obj["location"]] = MapperEnt( obj["contents"], tgt, "CreateFile", staged) else: resolved = self.get_file(loc) if self.get_file else loc if resolved.startswith("file:"): resolved = schema_salad.ref_resolver.uri_file_path( resolved) self._pathmap[loc] = MapperEnt( resolved, tgt, "WritableFile" if copy else "File", staged) self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged)
def visit(self, srcobj, uploadfiles): src = srcobj["location"] if "#" in src: src = src[:src.index("#")] if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src): self._pathmap[src] = MapperEnt( src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True) if arvados_cwl.util.collectionUUID in srcobj: self.pdh_to_uuid[src.split( "/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID] debug = logger.isEnabledFor(logging.DEBUG) if src not in self._pathmap: if src.startswith("file:"): # Local FS ref, may need to be uploaded or may be on keep # mount. ab = abspath(src, self.input_basedir) st = arvados.commands.run.statfile("", ab, fnPattern="keep:%s/%s", dirPattern="keep:%s/%s", raiseOSError=True) with SourceLine(srcobj, "location", WorkflowException, debug): if isinstance(st, arvados.commands.run.UploadFile): uploadfiles.add((src, ab, st)) elif isinstance(st, arvados.commands.run.ArvFile): self._pathmap[src] = MapperEnt( st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True) else: raise WorkflowException( "Input file path '%s' is invalid" % st) elif src.startswith("_:"): if srcobj["class"] == "File" and "contents" not in srcobj: raise WorkflowException( "File literal '%s' is missing `contents`" % src) if srcobj["class"] == "Directory" and "listing" not in srcobj: raise WorkflowException( "Directory literal '%s' is missing `listing`" % src) elif src.startswith("http:") or src.startswith("https:"): keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) logger.info("%s is %s", src, keepref) self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) else: self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) with SourceLine(srcobj, "secondaryFiles", WorkflowException, debug): for l in srcobj.get("secondaryFiles", []): self.visit(l, uploadfiles) with SourceLine(srcobj, "listing", WorkflowException, debug): for l in srcobj.get("listing", []): self.visit(l, uploadfiles)
def visit(self, obj, stagedir, basedir, copy=False, staged=False): tgt = convert_pathsep_to_unix(os.path.join(stagedir, obj["basename"])) if obj["location"] in self._pathmap: return if obj["class"] == "Directory": if obj["location"].startswith("file://"): log.warning("a file:// based Directory slipped through: %s", obj) resolved = uri_file_path(obj["location"]) else: resolved = obj["location"] self._pathmap[obj["location"]] = MapperEnt( resolved, tgt, "WritableDirectory" if copy else "Directory", staged) if obj["location"].startswith("file://"): staged = False self.visitlisting(obj.get("listing", []), tgt, basedir, copy=copy, staged=staged) elif obj["class"] == "File": path = obj["location"] abpath = abspath(path, basedir) if "contents" in obj and obj["location"].startswith("_:"): self._pathmap[obj["location"]] = MapperEnt( obj["contents"], tgt, "CreateFile", staged) else: with SourceLine(obj, "location", validate.ValidationException, log.isEnabledFor(logging.DEBUG)): deref = abpath if urllib.parse.urlsplit(deref).scheme in [ 'http', 'https' ]: deref = downloadHttpFile(path) elif urllib.parse.urlsplit(deref).scheme == 'ftp': deref = self._download_ftp_file(path) else: log.warning("unprocessed File %s", obj) # Dereference symbolic links st = os.lstat(deref) while stat.S_ISLNK(st.st_mode): rl = os.readlink(deref) deref = rl if os.path.isabs(rl) \ else os.path.join(os.path.dirname(deref), rl) st = os.lstat(deref) self._pathmap[path] = MapperEnt( deref, tgt, "WritableFile" if copy else "File", staged) self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged)
def visit(self, obj, stagedir, basedir, copy=False): # type: (Dict[unicode, Any], unicode, unicode, bool) -> None loc = obj["location"] if obj["class"] == "Directory": self._pathmap[loc] = MapperEnt(loc, stagedir, "Directory") elif obj["class"] == "File": if loc in self._pathmap: return tgt = os.path.join(stagedir, obj["basename"]) self._pathmap[loc] = MapperEnt(loc, tgt, "File") self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir)
def test_upload(self, statfile, upl): """Test pathmapper uploading files.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False): st = arvados.commands.run.UploadFile("", "tests/hw.py") return st upl.side_effect = upload_mock statfile.side_effect = statfile_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "file:tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'file:tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True) }, p._pathmap)
def test_statfile(self, statfile, upl): """Test pathmapper handling ArvFile references.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a # keep mount, so we can construct a direct reference directly without upload. def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"): st = arvados.commands.run.ArvFile( "", fnPattern % ("99999999999999999999999999999991+99", "hw.py")) return st upl.side_effect = upload_mock statfile.side_effect = statfile_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def visit(self, srcobj, uploadfiles): src = srcobj["location"] if "#" in src: src = src[:src.index("#")] if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src): self._pathmap[src] = MapperEnt( src, self.collection_pattern % urllib.unquote(src[5:]), srcobj["class"], True) if src not in self._pathmap: if src.startswith("file:"): # Local FS ref, may need to be uploaded or may be on keep # mount. ab = abspath(src, self.input_basedir) st = arvados.commands.run.statfile("", ab, fnPattern="keep:%s/%s", dirPattern="keep:%s/%s", raiseOSError=True) with SourceLine(srcobj, "location", WorkflowException): if isinstance(st, arvados.commands.run.UploadFile): uploadfiles.add((src, ab, st)) elif isinstance(st, arvados.commands.run.ArvFile): self._pathmap[src] = MapperEnt( st.fn, self.collection_pattern % urllib.unquote(st.fn[5:]), "File", True) else: raise WorkflowException( "Input file path '%s' is invalid" % st) elif src.startswith("_:"): if srcobj["class"] == "File" and "contents" not in srcobj: raise WorkflowException( "File literal '%s' is missing `contents`" % src) if srcobj["class"] == "Directory" and "listing" not in srcobj: raise WorkflowException( "Directory literal '%s' is missing `listing`" % src) else: self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) with SourceLine(srcobj, "secondaryFiles", WorkflowException): for l in srcobj.get("secondaryFiles", []): self.visit(l, uploadfiles) with SourceLine(srcobj, "listing", WorkflowException): for l in srcobj.get("listing", []): self.visit(l, uploadfiles)
def setup(self, referenced_files, basedir): # type: (List[Any], unicode) -> None # Go through each file and set the target to its own directory along # with any secondary files. self.visitlisting(referenced_files, self.stagedir, basedir) for path, (ab, tgt, type, staged) in viewitems(self._pathmap): if type in ("File", "Directory") and ab.startswith("keep:"): self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type, staged)
def test_keepref(self): """Test direct keep references.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "keep:99999999999999999999999999999991+99/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)}, p._pathmap)
def visit(self, obj, stagedir, basedir, copy=False): # type: (Dict[unicode, Any], unicode, unicode, bool) -> None if obj["class"] == "Directory": self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") self.visitlisting(obj.get("listing", []), stagedir, basedir) elif obj["class"] == "File": loc = obj["location"] if loc in self._pathmap: return tgt = os.path.join(stagedir, obj["basename"]) if "contents" in obj and obj["location"].startswith("_:"): self._pathmap[loc] = MapperEnt(obj["contents"], tgt, "CreateFile") else: if copy: self._pathmap[loc] = MapperEnt(obj["path"], tgt, "WritableFile") else: self._pathmap[loc] = MapperEnt(obj["path"], tgt, "File") self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir)
def mapper(self, src): # type: (Text) -> MapperEnt. # Overridden to maintain the use case of mapping by source (identifier) to # target regardless of how the map is structured interally. def getMapperEnt(src): for k,v in viewitems(self._pathmap): if (v.type != "CreateFile" and v.resolved == src) or (v.type == "CreateFile" and k == src): return v if u"#" in src: i = src.index(u"#") v = getMapperEnt(src[i:]) return MapperEnt(v.resolved, v.target + src[i:], v.type, v.staged) return getMapperEnt(src)
def test_upload(self, upl): """Test pathmapper uploading files.""" arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock()) upl.side_effect = upload_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def setup(self, referenced_files, basedir): # type: (List[Any], unicode) -> None uploadfiles = set() collection = None if self.single_collection: collection = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) already_uploaded = self.arvrunner.get_uploaded() copied_files = set() for k in referenced_files: loc = k["location"] if loc in already_uploaded: v = already_uploaded[loc] self._pathmap[loc] = MapperEnt( v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), v.type, True) if self.single_collection: basename = k["basename"] if basename not in collection: self.addentry( { "location": loc, "class": v.type, "basename": basename }, collection, ".", []) copied_files.add((loc, basename, v.type)) for srcobj in referenced_files: self.visit(srcobj, uploadfiles) arvados.commands.run.uploadfiles( [u[2] for u in uploadfiles], self.arvrunner.api, dry_run=False, num_retries=self.arvrunner.num_retries, fnPattern="keep:%s/%s", name=self.name, project=self.arvrunner.project_uuid, collection=collection) for src, ab, st in uploadfiles: self._pathmap[src] = MapperEnt( urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:], "Directory" if os.path.isdir(ab) else "File", True) self.arvrunner.add_uploaded(src, self._pathmap[src]) for loc, basename, cls in copied_files: fn = "keep:%s/%s" % (collection.portable_data_hash(), basename) self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True) for srcobj in referenced_files: subdirs = [] if srcobj["class"] == "Directory" and srcobj[ "location"] not in self._pathmap: c = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) for l in srcobj.get("listing", []): self.addentry(l, c, ".", subdirs) check = self.arvrunner.api.collections().list( filters=[[ "portable_data_hash", "=", c.portable_data_hash() ]], limit=1).execute(num_retries=self.arvrunner.num_retries) if not check["items"]: c.save_new(owner_uuid=self.arvrunner.project_uuid) ab = self.collection_pattern % c.portable_data_hash() self._pathmap[srcobj["location"]] = MapperEnt( "keep:" + c.portable_data_hash(), ab, "Directory", True) elif srcobj["class"] == "File" and ( srcobj.get("secondaryFiles") or (srcobj["location"].startswith("_:") and "contents" in srcobj)): c = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) self.addentry(srcobj, c, ".", subdirs) check = self.arvrunner.api.collections().list( filters=[[ "portable_data_hash", "=", c.portable_data_hash() ]], limit=1).execute(num_retries=self.arvrunner.num_retries) if not check["items"]: c.save_new(owner_uuid=self.arvrunner.project_uuid) ab = self.file_pattern % (c.portable_data_hash(), srcobj["basename"]) self._pathmap[srcobj["location"]] = MapperEnt( "keep:%s/%s" % (c.portable_data_hash(), srcobj["basename"]), ab, "File", True) if srcobj.get("secondaryFiles"): ab = self.collection_pattern % c.portable_data_hash() self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt( "keep:" + c.portable_data_hash(), ab, "Directory", True) if subdirs: for loc, sub in subdirs: # subdirs will all start with "./", strip it off ab = self.file_pattern % (c.portable_data_hash(), sub[2:]) self._pathmap[loc] = MapperEnt( "keep:%s/%s" % (c.portable_data_hash(), sub[2:]), ab, "Directory", True) self.keepdir = None
def setup(self, referenced_files, basedir): # type: (List[Any], unicode) -> None uploadfiles = set() collection = None if self.single_collection: collection = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) for srcobj in referenced_files: self.visit(srcobj, uploadfiles) arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], self.arvrunner.api, dry_run=False, num_retries=self.arvrunner.num_retries, fnPattern="keep:%s/%s", name=self.name, project=self.arvrunner.project_uuid, collection=collection, packed=False) for src, ab, st in uploadfiles: self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:], "Directory" if os.path.isdir(ab) else "File", True) for srcobj in referenced_files: remap = [] if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap: c = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) for l in srcobj.get("listing", []): self.addentry(l, c, ".", remap) container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger) info = arvados_cwl.util.get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl) c.save_new(name=info["name"], owner_uuid=self.arvrunner.project_uuid, ensure_unique_name=True, trash_at=info["trash_at"], properties=info["properties"]) ab = self.collection_pattern % c.portable_data_hash() self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True) elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or (srcobj["location"].startswith("_:") and "contents" in srcobj)): # If all secondary files/directories are located in # the same collection as the primary file and the # paths and names that are consistent with staging, # don't create a new collection. if not self.needs_new_collection(srcobj): continue c = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries ) self.addentry(srcobj, c, ".", remap) container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger) info = arvados_cwl.util.get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl) c.save_new(name=info["name"], owner_uuid=self.arvrunner.project_uuid, ensure_unique_name=True, trash_at=info["trash_at"], properties=info["properties"]) ab = self.file_pattern % (c.portable_data_hash(), srcobj["basename"]) self._pathmap[srcobj["location"]] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), srcobj["basename"]), ab, "File", True) if srcobj.get("secondaryFiles"): ab = self.collection_pattern % c.portable_data_hash() self._pathmap["_:" + str(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True) if remap: for loc, sub in remap: # subdirs start with "./", strip it off if sub.startswith("./"): ab = self.file_pattern % (c.portable_data_hash(), sub[2:]) else: ab = self.file_pattern % (c.portable_data_hash(), sub) self._pathmap[loc] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), sub[2:]), ab, "Directory", True) self.keepdir = None
def setup(self, referenced_files, basedir): # type: (List[Any], unicode) -> None self._pathmap = self.arvrunner.get_uploaded() uploadfiles = set() for srcobj in referenced_files: self.visit(srcobj, uploadfiles) if uploadfiles: arvados.commands.run.uploadfiles( [u[2] for u in uploadfiles], self.arvrunner.api, dry_run=False, num_retries=self.arvrunner.num_retries, fnPattern=self.file_pattern, name=self.name, project=self.arvrunner.project_uuid) for src, ab, st in uploadfiles: self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File") self.arvrunner.add_uploaded(src, self._pathmap[src]) for srcobj in referenced_files: if srcobj["class"] == "Directory": if srcobj["location"] not in self._pathmap: c = arvados.collection.Collection( api_client=self.arvrunner.api, num_retries=self.arvrunner.num_retries) subdirs = [] for l in srcobj["listing"]: self.addentry(l, c, ".", subdirs) check = self.arvrunner.api.collections().list( filters=[[ "portable_data_hash", "=", c.portable_data_hash() ]], limit=1).execute( num_retries=self.arvrunner.num_retries) if not check["items"]: c.save_new(owner_uuid=self.arvrunner.project_uuid) ab = self.collection_pattern % c.portable_data_hash() self._pathmap[srcobj["location"]] = MapperEnt( ab, ab, "Directory") for loc, sub in subdirs: ab = self.file_pattern % (c.portable_data_hash(), sub[2:]) self._pathmap[loc] = MapperEnt(ab, ab, "Directory") elif srcobj["class"] == "File" and ( srcobj.get("secondaryFiles") or (srcobj["location"].startswith("_:") and "contents" in srcobj)): c = arvados.collection.Collection( api_client=self.arvrunner.api, num_retries=self.arvrunner.num_retries) subdirs = [] self.addentry(srcobj, c, ".", subdirs) check = self.arvrunner.api.collections().list( filters=[[ "portable_data_hash", "=", c.portable_data_hash() ]], limit=1).execute(num_retries=self.arvrunner.num_retries) if not check["items"]: c.save_new(owner_uuid=self.arvrunner.project_uuid) ab = self.file_pattern % (c.portable_data_hash(), srcobj["basename"]) self._pathmap[srcobj["location"]] = MapperEnt(ab, ab, "File") if srcobj.get("secondaryFiles"): ab = self.collection_pattern % c.portable_data_hash() self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt( ab, ab, "Directory") for loc, sub in subdirs: ab = self.file_pattern % (c.portable_data_hash(), sub[2:]) self._pathmap[loc] = MapperEnt(ab, ab, "Directory") self.keepdir = None
def test_docker_tmpdir_prefix(tmp_path: Path) -> None: """Test that DockerCommandLineJob respects temp directory directives.""" (tmp_path / "3").mkdir() tmpdir_prefix = str(tmp_path / "3" / "ttmp") runtime_context = RuntimeContext({ "tmpdir_prefix": tmpdir_prefix, "user_space_docker_cmd": None }) builder = Builder( {}, [], [], {}, schema.Names(), [], [], {}, None, None, StdFsAccess, StdFsAccess(""), None, 0.1, False, False, False, "", runtime_context.get_outdir(), runtime_context.get_tmpdir(), runtime_context.get_stagedir(), INTERNAL_VERSION, ) job = DockerCommandLineJob(builder, {}, PathMapper, [], [], "") runtime: List[str] = [] volume_writable_file = MapperEnt(resolved=get_data("tests/2.fastq"), target="foo", type=None, staged=None) (tmp_path / "1").mkdir() job.add_writable_file_volume(runtime, volume_writable_file, None, str(tmp_path / "1" / "writable_file")) children = sorted((tmp_path / "1").glob("*")) assert len(children) == 1 subdir = tmp_path / children[0] assert subdir.name.startswith("writable_file") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "2.fastq").exists() resolved_writable_dir = tmp_path / "data_orig" resolved_writable_dir.mkdir(parents=True) volume_dir = MapperEnt(resolved=str(resolved_writable_dir), target="bar", type=None, staged=None) (tmp_path / "2").mkdir() job.add_writable_directory_volume(runtime, volume_dir, None, str(tmp_path / "2" / "dir")) children = sorted((tmp_path / "2").glob("*")) assert len(children) == 1 subdir = tmp_path / "2" / children[0] assert subdir.name.startswith("dir") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "data_orig").exists() cidfile = job.create_runtime({}, runtime_context)[1] assert cidfile and cidfile.startswith(tmpdir_prefix) volume_file = MapperEnt(resolved="Hoopla!", target="baz", type=None, staged=None) (tmp_path / "4").mkdir() job.create_file_and_add_volume(runtime, volume_file, None, None, str(tmp_path / "4" / "file")) children = sorted((tmp_path / "4").glob("*")) assert len(children) == 1 subdir = tmp_path / "4" / children[0] assert subdir.name.startswith("file") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "baz").exists()