def test_pull(caplog, url, system): ps = test_pull.ps ps.system = system sp.call("docker rmi -f %s &> /dev/null" % (url), shell=True) ret = ps.pull(url) if 'latest' not in url: assert ret assert url in ps.valid and url not in ps.invalid assert ps.categories[url] == ['Read mapping'] assert ps.keywords[url] == ['Mapping'] assert ps.description[ url] == "Fast, accurate, memory-efficient aligner for short and long sequencing reads" assert ps.homepage[url] == 'http://bio-bwa.sourceforge.net' if ps.system == 'docker': assert '0.7.3a--hed695b0_5' in translate( sp.check_output( 'docker images | grep "quay.io/biocontainers/bwa"', shell=True)) assert ps.images[url] == url assert not os.path.exists(os.path.join(ps.containerDir, 'bwa')) else: assert '0.7.3a--hed695b0_5' not in translate( sp.check_output('docker images', shell=True)).rstrip('\n') assert ps.images[url] == os.path.join( ps.containerDir, 'bwa', 'bwa-0.7.3a--hed695b0_5.sif') assert os.path.exists(ps.images[url]) else: assert not ret assert url not in ps.valid and url in ps.invalid assert "Not pulling" in caplog.text assert url not in ps.categories assert url not in ps.keywords assert url not in ps.homepage assert url not in ps.description
def _getMetadata(self, url): ''' Assuming the image is a biocontainer, - `self.categories[url]` - `self.keywords[url]` - `self.description[url]` - `self.homepage[url]` are set after querying https://dev.bio.tools # Parameters url (str): Image url used to pull ''' if url in self.description and url in self.keywords and url in self.description: logger.debug("Metadata already set for %s" % (url)) return if url not in self.name: self.parseURL(url) name = self.name[url] self.homepage[url] = False try: # Check dev.bio.tools md_url = "https://dev.bio.tools/api/tool/%s?format=json" % (name) resp_json = json.loads(translate(urllib2.urlopen(md_url).read())) topics = [topic['term'] for topic in resp_json['topic']] topics = [t for t in topics if t != 'N/A'] functions = [ o['term'] for f in resp_json['function'] for o in f['operation'] ] desc = resp_json['description'] if 'homepage' in resp_json: self.homepage[url] = resp_json['homepage'] except urllib2.HTTPError: try: # Check Launchpad md_url = "https://api.launchpad.net/devel/%s" % (name) resp_json = json.loads( translate(urllib2.urlopen(md_url).read())) desc = resp_json['description'] self.homepage[url] = resp_json['homepage_url'] topics = ["Container"] functions = ["Unknown"] except: # Default values logger.debug("No record of %s on dev.bio.tools or launchpad" % (name)) functions = ["Unknown"] topics = ["Container"] desc = "The %s package" % (name) self.categories[url] = functions self.keywords[url] = topics self.description[url] = desc
def _ccheck_output(self, url, cmd): if self.system not in self.cmd_templates: logger.error("%s system is unhandled" % (self.system)) sys.exit(500) to_run = self.cmd_templates[self.system] % (self.images[url], cmd) logger.debug("Running: %s" % (to_run)) output = sp.check_output(to_run, shell=True) return list(filter(lambda x: x, re.split(r'\r?\n', translate(output))))
def _getTags(self, url, remove_latest=False): ''' Returns all tags for the image specified with URL # Parameters url (str): Image url used to pull remove_latest (bool): Removes the "latest" tag from the return set # Attributes self.tag_dict (dict): Temporary cache of tags, to prevent repeated requests: {(registry,org,name):set,} # Returns set: all tags associated with main image URL ''' tag_query = {'dockerhub':('https://hub.docker.com/v2/repositories/%s/%s/tags/','results'),\ 'quay':('https://quay.io/api/v1/repository/%s/%s/tag/','tags')} #{registry:(url,key),} tag_tuple = self._getUrlTuple(url) if self.registry[url] not in tag_query: logger.error('Unable to query tags for %s' % (url)) self.tag_dict[tag_tuple] = set() if tag_tuple not in self.tag_dict: query, key = tag_query[self.registry[url]] query = query % (self.org[url], self.name[url]) try: resp = json.loads(translate(urllib2.urlopen(query).read())) results = resp[key] while 'next' in resp and resp['next']: resp = json.loads( translate(urllib2.urlopen(resp['next']).read())) results += resp[key] all_tags = set([t['name'] for t in results]) self.tag_dict[tag_tuple] = all_tags except urllib2.HTTPError: logger.warning("No response from %s" % (query)) self.tag_dict[tag_tuple] = set() if not remove_latest: return self.tag_dict[tag_tuple] logger.debug("Removing the latest tag from %s" % (url)) return self.tag_dict[tag_tuple] - set(['latest'])
def test__pullImage_docker(caplog): ps = test__pullImage_docker.ps ps.system = 'docker' url = 'quay.io/biocontainers/bwa:0.7.3a--hed695b0_5' sp.call("docker rmi %s &> /dev/null" % (url), shell=True) ps.parseURL(url) ret = ps._pullImage(url) assert '0.7.3a--hed695b0_5' in translate( sp.check_output('docker images | grep "quay.io/biocontainers/bwa"', shell=True)).rstrip('\n') assert ps.images[url] == url assert not os.path.exists(os.path.join(ps.containerDir, 'bwa')) sp.call("docker rmi %s &> /dev/null" % (url), shell=True)
def test__pullImage_singularity(caplog): ps = test__pullImage_singularity.ps ps.system = 'singularity3' url = 'quay.io/biocontainers/bwa:0.7.3a--hed695b0_5' sp.call("docker rmi -f %s &> /dev/null" % (url), shell=True) ps.parseURL(url) ret = ps._pullImage(url) assert ret == True assert ps.system == 'singularity3' assert '0.7.3a--hed695b0_5' not in translate( sp.check_output('docker images', shell=True)).rstrip('\n') assert ps.images[url] == os.path.join(ps.containerDir, 'bwa', 'bwa-0.7.3a--hed695b0_5.sif') assert os.path.exists(ps.images[url])
def test__pullDocker(caplog): ps = test__pullDocker.ps ps.system = 'docker' url = 'quay.io/biocontainers/bwa:0.7.3a--hed695b0_5' sp.call("docker rmi %s &> /dev/null" % (url), shell=True) img_out, img_dir, simg = tmp_file(split=True) ps.parseURL(url) ret = ps._pullDocker(url, img_dir, simg) assert '0.7.3a--hed695b0_5' in translate( sp.check_output('docker images | grep "quay.io/biocontainers/bwa"', shell=True)).rstrip('\n') assert ret == url assert not os.path.exists(img_out) sp.call("docker rmi %s &> /dev/null" % (url), shell=True)
def _detectSingularity(self): if not sp.call('singularity help &>/dev/null', shell=True): logger.debug("Detected singularity for container management") #singularity version 3.3.0-1.fc29 #2.6.0-dist sing_version = translate( sp.check_output('singularity --version', shell=True)).rstrip('\n').split() if len(sing_version) > 1: sing_version = sing_version[2] else: sing_version = sing_version[0] split_version = sing_version.split('.') version = split_version[0] self.point_version = split_version[1] logger.debug("Detected singularity %c.%c" % (split_version[0], split_version[1])) return 'singularity%c' % (version) logger.debug("Did not detect singularity") return False
def validateURL(self, url, include_libs=False): ''' Adds url to the self.invalid set when a URL is invalid and self.valid when a URL work. URLs to the following registries will be considered invalid because they require authentication: - Singularity Hub (shub://) - GitHub packages (docker.pkg.github.com) - GitHub container registry (ghcr.io) By default, containers designated as libraries on bio.tools are excluded. # Parameters url (str): Image url used to pull include_libs (bool): Include containers of libraries # Attributes self.valid (set): Where valid URLs are stored self.invalid (set): Where invalid URLs are stored self.registry_exclude_re (re): Compiled regular expression of registry urls to exclude ''' # Exclude registries that require authentication if self.registry_exclude_re.match(url): logger.debug( "The registry for %s requires authentication and is not supported by rgc." % (url)) self.invalid.add(url) return # Sanitize docker prefix if included if url not in self.sanitized_url: self.parseURL(url) name = self.name[url] tag = self.tag[url] if not tag: logger.warning("Excluding - No tag included in %s" % (url)) self.invalid.add(url) return if not include_libs: # See if it is a bio lib md_url = "https://dev.bio.tools/api/tool/%s?format=json" % (name) try: resp_json = json.loads( translate(urllib2.urlopen(md_url).read())) types = [v for v in resp_json['toolType']] if types == ['Library']: self.invalid.add(url) logger.debug("Excluding %s, which is a library" % (url)) return except urllib2.HTTPError: pass ## Check for pypi lib #if name not in set(('ubuntu','singularity','bowtie','centos')): # try: # code = urllib2.urlopen('https://pypi.org/pypi/%s/json'%(name)).getcode() # if int(code) == 200: # self.invalid.add(url) # logger.debug("Excluding %s, which is a pypi package"%(url)) # return # except urllib2.HTTPError: # pass if tag not in self._getTags(url): logger.warning("%s not found in %s" % (tag, self._getTags(url))) self.invalid.add(url) logger.warning("%s is an invalid URL" % (url)) else: logger.debug("%s is valid" % (url)) self.valid.add(url)