Пример #1
0
 def info(self, version='latest'):
     """
     Display version information (release note, etc...) for given version
     {
         "info": ...
         "release_note": ...
     }
     """
     file_url = urljoin(self.base_url, "%s.json" % version)
     result = {}
     build_meta = self.load_remote_json(file_url)
     if not build_meta:
         raise DumperException("Can't find version '%s'" % version)
     result["info"] = build_meta
     if build_meta.get("changes"):
         result["release_note"] = {}
         for filtyp in build_meta["changes"]:
             relnote_url = build_meta["changes"][filtyp]["url"]
             res = self.client.get(relnote_url)
             if res.status_code == 200:
                 if filtyp == "json":
                     result["release_note"][filtyp] = res.json()
                 else:
                     result["release_note"][filtyp] = res.text
             else:
                 raise DumperException(
                     "Error while downloading release note '%s (%s)': %s" %
                     (version, res, res.text))
     return result
Пример #2
0
 def auth_get(url, *args, **kwargs):
     if ".s3-website-" in url:
         raise DumperException(
             "Can't access s3 static website using authentication")
     # extract region from URL (reliable ?)
     pat = re.compile(r"https?://(.*)\.(.*)\.amazonaws.com.*")
     m = pat.match(url)
     if m:
         bucket_name, frag = m.groups()
         # looks like "s3-us-west-2"
         # whether static website is activated or not
         region = frag.replace("s3-", "")
         if region == "s3":  # url doesn't contain a region, we need to query the bucket
             s3client = boto3.client(
                 "s3",
                 aws_access_key_id=self.__class__.AWS_ACCESS_KEY_ID,
                 aws_secret_access_key=self.__class__.
                 AWS_SECRET_ACCESS_KEY)
             bucket_info = s3client.get_bucket_location(
                 Bucket=bucket_name)
             region = bucket_info["LocationConstraint"]
         auth = AWS4Auth(self.__class__.AWS_ACCESS_KEY_ID,
                         self.__class__.AWS_SECRET_ACCESS_KEY,
                         region, 's3')
         return self._client.get(url, auth=auth, *args, **kwargs)
     else:
         raise DumperException(
             "Couldn't determine s3 region from url '%s'" % url)
Пример #3
0
    def check_compat(self, build_meta):
        if hasattr(btconfig,
                   "SKIP_CHECK_COMPAT") and btconfig.SKIP_CHECK_COMPAT:
            return

        msg = []
        for version_field in [
                "app_version", "standalone_version", "biothings_version"
        ]:
            VERSION_FIELD = version_field.upper()
            version = build_meta.get(version_field)
            assert version is not None, "Version field '%s' is None" % VERSION_FIELD
            # some releases use dict (most recent) some use string
            if isinstance(version, dict):
                version = version["branch"]
            if type(version) != list:
                version = [version]
            # remove hash from versions (only useful when version is a string,
            # not a dict, see above
            version = [re.sub(r"( \[.*\])", "", v) for v in version]
            version = set(version)
            if version == set([None]):
                raise DumperException(
                    "Remote data is too old and can't be handled with current app (%s not defined)"
                    % version_field)
            versionfromconf = re.sub(
                r"( \[.*\])", "",
                getattr(btconfig, VERSION_FIELD).get("branch"))
            VERSION = set()
            VERSION.add(versionfromconf)
            found_compat_version = VERSION.intersection(version)
            assert found_compat_version, "Remote data requires %s to be %s, but current app is %s" % (
                version_field, version, VERSION)
            msg.append("%s=%s:OK" % (version_field, version))
Пример #4
0
    def post_dump(self, *args, **kwargs):
        if not self.release:
            # wasn't set before, means no need to post-process (ie. up-to-date, already done)
            return
        build_meta = json.load(
            open(os.path.join(self.new_data_folder, "%s.json" % self.release)))
        if build_meta["type"] == "incremental":
            self.logger.info("Checking md5sum for files in '%s'" %
                             self.new_data_folder)
            metadata = json.load(
                open(os.path.join(self.new_data_folder, "metadata.json")))
            for md5_fname in metadata["diff"]["files"]:
                spec_md5 = md5_fname["md5sum"]
                fname = md5_fname["name"]
                compute_md5 = md5sum(os.path.join(self.new_data_folder, fname))
                if compute_md5 != spec_md5:
                    self.logger.error(
                        "md5 check failed for file '%s', it may be corrupted" %
                        fname)
                    e = DumperException("Bad md5sum for file '%s'" % fname)
                    self.register_status("failed", download={"err": repr(e)})
                    raise e
                else:
                    self.logger.debug("md5 check success for file '%s'" %
                                      fname)
        elif build_meta["type"] == "full":
            # if type=fs, check if archive must be uncompressed
            # TODO

            # repo_name = list(build_meta["metadata"]["repository"].keys())[0]
            if build_meta["metadata"]["repository"]["type"] == "fs":
                uncompressall(self.new_data_folder)
Пример #5
0
 def get_release(self):
     self.client.cwd(self.__class__.CWD_DIR)
     releases = sorted(self.client.nlst())
     if len(releases) == 0:
         raise DumperException("Can't any release information in '%s'" %
                               self.__class__.VERSION_DIR)
     self.release = releases[-1]
Пример #6
0
 def post_download(self,remote,local):
     filename = os.path.basename(local)
     if not self.release in filename:
         raise DumperException("Weird, filename is wrong ('%s')" % filename)
     # make sure we downloaded to correct one, and that it's the academic version
     zf = zipfile.ZipFile(local)
     readme = None
     for f in zf.filelist:
         if "readme" in f.filename:
             readme = f
             break
     if not readme:
         raise DumperException("Can't find a readme in the archive (I was checking version/license)")
     if not self.release in readme.filename:
         raise DumperException("Version in readme filename ('%s') doesn't match expected version %s" % (readme.filename, self.release))
     assert self.release.endswith("a"), "Release '%s' isn't academic version (how possible ?)" % self.release
Пример #7
0
 def info(self,version=LATEST):
     """Display version information (release note, etc...) for given version"""
     txt = ">>> Current local version: '%s'\n" % self.target_backend.version
     txt += ">>> Release note for remote version '%s':\n" % version
     file_url = self.__class__.SRC_URL % (self.__class__.BIOTHINGS_S3_FOLDER,version)
     build_meta = self.load_remote_json(file_url)
     if not build_meta:
         raise DumperException("Can't find version '%s'" % version)
     if build_meta.get("changes") and build_meta["changes"].get("txt"):
         relnote_url = build_meta["changes"]["txt"]["url"]
         res = self.client.get(relnote_url)
         if res.status_code == 200:
             return txt + res.text
         else:
             raise DumperException("Error while downloading release note '%s': %s" % (version,res))
     else:
         return txt + "No information found for release '%s'" % version
Пример #8
0
 def download(self, remotefile, localfile):  # pylint: disable=arguments-differ
     self.prepare_local_folders(localfile)
     self.logger.debug("Downloading '%s'", os.path.basename(localfile))
     # remote is a method name
     method = getattr(self, remotefile)
     method(localfile)
     # rough sanity check against "empty" files w/ just headers
     if os.stat(localfile).st_size < 1024*1024:  # at least 1MiB
         raise DumperException("'%s' is too small, no data ?" % localfile)
Пример #9
0
 def get_release(self):
     # only dir with dates
     releases = sorted([
         d for d in self.client.nlst() if re.match("\d{4}-\d{2}-\d{2}", d)
     ])
     if len(releases) == 0:
         raise DumperException("Can't any release information in '%s'" %
                               self.__class__.VERSION_DIR)
     self.release = releases[-1]
Пример #10
0
 def versions(self):
     """Display all available versions"""
     avail_versions = self.load_remote_json(self.__class__.VERSION_URL)
     if not avail_versions:
         raise DumperException("Can't find any versions available...")
     assert avail_versions[
         "format"] == "1.0", "versions.json format has changed: %s" % avail_versions[
             "format"]
     return avail_versions["versions"]
Пример #11
0
 def get_drive_url(self,ftpname):
     # ok, so let's get the main page data. in this page there are links for both
     # FTP and Google Drive. We're assuming here that just after FTP link, there's
     # the corresponding one for Drive (parse will ensure we downloaded the correct
     # version, and also the correct licensed one - academic only)
     res = requests.get("https://sites.google.com/site/jpopgen/dbNSFP")
     html = BeautifulSoup(res.text,"html.parser")
     ftplink = html.findAll(attrs={"href":re.compile(ftpname)})
     if ftplink:
         ftplink = ftplink.pop()
     else:
         raise DumperException("Can't find a FTP link for '%s'" % ftpname)
     # let's cross fingers here...
     drivelink = ftplink.findNextSibling()
     href = drivelink.get("href")
     if href:
         return href
     else:
         raise DumperException("Can't find a href in drive link element: %s" % drivelink)
Пример #12
0
 def get_latest_release(self):
     res = self.client.get(self.__class__.HOMEPAGE_URL)
     html = bs4.BeautifulSoup(res.text,"lxml")
     # link containing the latest date version
     version = html.find(attrs={"href":"/srs/jsp/srs/uniiListDownload.jsp"}).text
     m = re.match("UNII List download \(updated (.*)\)",version)
     try:
         latest = datetime.date.strftime(dtparser.parse(m.groups()[0]),"%Y-%m-%d")
         return latest
     except Exception as e:
         raise DumperException("Can't find or parse date from URL '%s': %s" % (self.__class__.HOMEPAGE_URL,e))
Пример #13
0
 def versions(self):
     """Display all available versions"""
     versions_url = self.__class__.SRC_URL % (self.__class__.BIOTHINGS_S3_FOLDER,VERSIONS)
     avail_versions = self.load_remote_json(versions_url)
     if not avail_versions:
         raise DumperException("Can't find any versions available...'")
     res = []
     assert avail_versions["format"] == "1.0", "versions.json format has changed: %s" % avail_versions["format"]
     for ver in avail_versions["versions"]:
         res.append("version=%s date=%s type=%s" % ('{0: <20}'.format(ver["build_version"]),'{0: <20}'.format(ver["release_date"]),
         '{0: <16}'.format(ver["type"])))
     return "\n".join(res)
Пример #14
0
    def check_compat(self,build_meta):
        if hasattr(btconfig,"SKIP_CHECK_COMPAT") and btconfig.SKIP_CHECK_COMPAT:
            return

        msg = []
        for version_field in ["app_version","standalone_version","biothings_version"]:
            VERSION_FIELD = version_field.upper()
            version = build_meta.get(version_field)
            if type(version) != list:
                version = [version]
            version = set(version)
            if version == set([None]):
                raise DumperException("Remote data is too old and can't be handled with current app (%s not defined)" % version_field)
            VERSION = set()
            VERSION.add(getattr(btconfig,VERSION_FIELD))
            found_compat_version = VERSION.intersection(version)
            assert found_compat_version, "Remote data requires %s to be %s, but current app is %s" % (version_field,version,VERSION)
            msg.append("%s=%s:OK" % (version_field,version))
        self.logger.debug("Compat: %s" % ", ".join(msg))
Пример #15
0
 def choose_best_version(self,versions):
     """
     Out of all compatible versions, choose the best:
     1. choose incremental vs. full according to preferences
     2. version must be the highest (most up-to-date)
     """
     # 1st pass
     # TODO: implemente inc/full preferences, for now prefer incremental
     if not versions:
         raise DumperException("No compatible version found")
     preferreds = [v for v in versions if "." in v]
     if preferreds:
         self.logger.info("Preferred versions (according to preferences): %s" % preferreds)
         versions = preferreds
     # we can directly take the max because:
     # - version is a string
     # - format if YYYYMMDD 
     # - when incremental, it's always old_version.new_version
     return max(versions,key=lambda e: e["build_version"])
Пример #16
0
    def download(self, remoteurl, localfile, headers={}):
        self.prepare_local_folders(localfile)
        parsed = urlparse(remoteurl)
        if self.__class__.AWS_ACCESS_KEY_ID and self.__class__.AWS_SECRET_ACCESS_KEY:
            # accessing diffs controled by auth
            key = parsed.path.strip(
                "/")  # s3 key are relative, not / at beginning
            # extract bucket name from URL (reliable?)
            pat = re.compile(r"^(.*?)\..*\.amazonaws.com")
            m = pat.match(parsed.netloc)
            if m:
                bucket_name = m.groups()[0]
            else:
                raise DumperException(
                    "Can't extract bucket name from URL '%s'" % remote_url)

            return self.auth_download(bucket_name, key, localfile, headers)
        else:
            return self.anonymous_download(remoteurl, localfile, headers)
Пример #17
0
 def versions(self):
     """
     Display all available versions.
     Example:
     [{
         'build_version': '20171003', 
         'url': 'https://biothings-releases.s3.amazonaws.com:443/mygene.info/20171003.json', 
         'release_date': '2017-10-06T11:58:39.749357', 
         'require_version': None, 
         'target_version': '20171003', 
         'type': 'full'
     }, ...]
     """
     avail_versions = self.load_remote_json(self.__class__.VERSION_URL)
     if not avail_versions:
         raise DumperException("Can't find any versions available...")
     assert avail_versions[
         "format"] == "1.0", "versions.json format has changed: %s" % avail_versions[
             "format"]
     return avail_versions["versions"]