Пример #1
0
 def load_data(self,data_folder):
     # there's one zip there, let's get the zipped filename
     zgrasp = glob.glob(os.path.join(data_folder,"*.zip"))
     if len(zgrasp) != 1:
         raise uploader.ResourceError("Expecting one zip only, got: %s" % repr(zgrasp))
     zgrasp = zgrasp.pop()
     zf = zipfile.ZipFile(zgrasp)
     content = [e.filename for e in zf.filelist]
     if len(content) != 1:
         raise uploader.ResourceError("Expecting only one file in the archive, got: %s" % content)
     input_file = content.pop()
     input_file = os.path.join(data_folder,input_file)
     self.logger.info("Load data from file '%s'" % input_file)
     res = load_data(input_file)
     return res
Пример #2
0
 def jobs(self):
     files = glob.glob(
         os.path.join(self.data_folder, self.__class__.GLOB_PATTERN))
     if len(files) != 1:
         raise uploader.ResourceError("Expected 1 files, got: %s" % files)
     chrom_list = [str(i) for i in range(1, 23)] + ['X', 'Y', 'MT']
     return list(itertools.product(files, chrom_list))
Пример #3
0
 def load_data(self, data_folder):
     content = glob.glob(os.path.join(data_folder, "ExAC_nonTCGA.r*.vcf"))
     if len(content) != 1:
         raise uploader.ResourceError(
             "Expecting one single vcf file, got: %s" % repr(content))
     input_file = content.pop()
     self.logger.info("Load data from file '%s'" % input_file)
     return load_data(self.__class__.name, input_file)
 def load_data(self,data_folder):
     # there's one vcf file there, let's get it
     input_file = glob.glob(os.path.join(data_folder,"*.vcf"))
     if len(input_file) != 1:
         raise uploader.ResourceError("Expecting only one VCF file, got: %s" % input_file)
     input_file = input_file.pop()
     self.logger.info("Load data from file '%s'" % input_file)
     return load_data(input_file)
Пример #5
0
    def restore_snapshot(self, build_meta, job_manager, **kwargs):
        idxr = self.target_backend.target_esidxer
        # first check if snapshot repo exists
        repo_name, repo_settings = list(
            build_meta["metadata"]["repository"].items())[0]
        try:
            repo = idxr.get_repository(repo_name)
            # ok it exists, check if settings are the same
            if repo[repo_name] != repo_settings:
                # different, raise exception so it's handles in the except
                self.logger.info(
                    "Repository '%s' was found but settings are different, it needs to be created again"
                    % repo_name)
                raise IndexerException
        except IndexerException:
            # okgg, it doesn't exist let's try to create it
            try:
                repo = idxr.create_repository(repo_name, repo_settings)
            except IndexerException as e:
                raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration " + \
                        "file, you should have a line like this: " + \
                        'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"] + \
                        "allowing snapshot to be restored from this URL. Error was: %s" % e)

        # repository is now ready, let's trigger the restore
        snapshot_name = build_meta["metadata"]["snapshot_name"]
        pinfo = self.get_pinfo()
        pinfo["step"] = "restore"
        pinfo["description"] = snapshot_name

        def get_status_info():
            try:
                res = idxr.get_restore_status(idxr._index)
                return res
            except Exception as e:
                # somethng went wrong, report as failure
                return "FAILED %s" % e

        def restore_launched(f):
            try:
                self.logger.info("Restore launched: %s" % f.result())
            except Exception as e:
                self.logger.error("Error while lauching restore: %s" % e)
                raise e

        self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" %
                         (snapshot_name, idxr._index, idxr.es_host))
        job = yield from job_manager.defer_to_thread(
            pinfo,
            partial(idxr.restore,
                    repo_name,
                    snapshot_name,
                    idxr._index,
                    purge=self.__class__.AUTO_PURGE_INDEX))
        job.add_done_callback(restore_launched)
        yield from job
        while True:
            status_info = get_status_info()
            status = status_info["status"]
            self.logger.info("Recovery status for index '%s': %s" %
                             (idxr._index, status_info))
            if status in ["INIT", "IN_PROGRESS"]:
                yield from asyncio.sleep(
                    getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60))
            else:
                if status == "DONE":
                    self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" % \
                            (snapshot_name,idxr._index,idxr.es_host),extra={"notify":True})
                else:
                    e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" % \
                            (snapshot_name,idxr._index,status))
                    self.logger.error(e)
                    raise e
                break