def load_data(self, data_folder): # there's one vcf file there, let's get it input_file = glob.glob(os.path.join(data_folder, "*.vcf")) if len(input_file) != 1: raise uploader.ResourceError( "Expecting only one VCF file, got: %s" % input_file) input_file = input_file.pop() self.logger.info("Load data from file '%s'" % input_file) return load_data(input_file)
def load_data(self, data_folder): files = glob.glob( os.path.join(data_folder, "exomes", self.__class__.GLOB_PATTERN)) self.logger.info( "papapap %s" % os.path.join(data_folder, "exomes", self.__class__.GLOB_PATTERN)) if len(files) != 1: raise uploader.ResourceError( "Expecting only one VCF file, got: %s" % files) input_file = files.pop() assert os.path.exists( "%s%s" % (input_file, self.__class__.tbi_suffix)), "%s%s" % ( input_file, self.__class__.tbi_suffix) self.logger.info("Load data from file '%s'" % input_file) res = load_data_exomes(input_file) return res
def restore_snapshot(self, build_meta, job_manager, **kwargs): idxr = self.target_backend.target_esidxer # first check if snapshot repo exists repo_name, repo_settings = list( build_meta["metadata"]["repository"].items())[0] # do we need to enrich with some credentials ? (there are part of repo creation JSON settings) if repo_settings.get( "type") == "s3" and btconfig.STANDALONE_AWS_CREDENTIALS.get( "AWS_ACCESS_KEY_ID"): repo_settings["settings"][ "access_key"] = btconfig.STANDALONE_AWS_CREDENTIALS[ "AWS_ACCESS_KEY_ID"] repo_settings["settings"][ "secret_key"] = btconfig.STANDALONE_AWS_CREDENTIALS[ "AWS_SECRET_ACCESS_KEY"] repo_settings["settings"]["readonly"] = True try: repo = idxr.get_repository(repo_name) # ok it exists, check if settings are the same if repo[repo_name] != repo_settings: # different, raise exception so it's handles in the except self.logger.info( "Repository '%s' was found but settings are different, it needs to be created again" % repo_name) self.logger.debug("Existing setting: %s" % repo[repo_name]) self.logger.debug("Required (new) setting: %s" % repo_settings) raise IndexerException except IndexerException: # ok, it doesn't exist let's try to create it try: repo = idxr.create_repository(repo_name, repo_settings) except IndexerException as e: if repo_settings["settings"].get("url"): raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration " + \ "file, you should have a line like this: " + \ 'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"] + \ "allowing snapshot to be restored from this URL. Error was: %s" % e) else: # try to create repo without key/secret, assuming it's already configured in ES keystore if repo_settings["settings"].get("access_key"): repo_settings["settings"].pop("access_key") repo_settings["settings"].pop("secret_key") try: repo = idxr.create_repository( repo_name, repo_settings) except IndexerException as e: raise uploader.ResourceError("Could not create snapshot repository, even assuming " + \ "credentials configured in keystore: %s" % e) else: raise uploader.ResourceError( "Could not create snapshot repository: %s" % e) # repository is now ready, let's trigger the restore snapshot_name = build_meta["metadata"]["snapshot_name"] pinfo = self.get_pinfo() pinfo["step"] = "restore" pinfo["description"] = snapshot_name def get_status_info(): try: res = idxr.get_restore_status(idxr._index) return res except Exception as e: # somethng went wrong, report as failure return "FAILED %s" % e def restore_launched(f): try: self.logger.info("Restore launched: %s" % f.result()) except Exception as e: self.logger.error("Error while lauching restore: %s" % e) raise e self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" % (snapshot_name, idxr._index, idxr.es_host)) job = yield from job_manager.defer_to_thread( pinfo, partial(idxr.restore, repo_name, snapshot_name, idxr._index, purge=self.__class__.AUTO_PURGE_INDEX)) job.add_done_callback(restore_launched) yield from job while True: status_info = get_status_info() status = status_info["status"] self.logger.info("Recovery status for index '%s': %s" % (idxr._index, status_info)) if status in ["INIT", "IN_PROGRESS"]: yield from asyncio.sleep( getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60)) else: if status == "DONE": self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" % \ (snapshot_name,idxr._index,idxr.es_host),extra={"notify":True}) else: e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" % \ (snapshot_name,idxr._index,status)) self.logger.error(e) raise e break # return current number of docs in index return self.target_backend.count()
def restore_snapshot(self, build_meta, job_manager, **kwargs): self.logger.debug("Restoring snapshot...") idxr = self.target_backend.target_esidxer es_host = idxr.es_host self.logger.debug("Got ES Host: %s", es_host) repo_name, repo_settings = self.get_snapshot_repository_config( build_meta) self.logger.debug("Got repo name: %s", repo_name) self.logger.debug("With settings: %s", repo_settings) # pull authentication settings from config auth = btconfig.STANDALONE_CONFIG.get(self.name, {}).get( 'auth', btconfig.STANDALONE_CONFIG['_default'].get('auth') ) if auth: self.logger.debug("Obtained Auth settings, using them.") else: self.logger.debug("No Auth settings found") # all restore repos should be r/o repo_settings["settings"]["readonly"] = True # populate additional settings additional_settings = btconfig.STANDALONE_CONFIG.get(self.name, {}).get( 'repo_settings', btconfig.STANDALONE_CONFIG['_default'].get('repo_settings') ) if additional_settings: self.logger.debug("Adding additional settings: %s", additional_settings) repo_settings['settings'].update(additional_settings) if 'client' not in repo_settings['settings']: self.logger.warning( "\"client\" not set in repository settings. The 'default' " "client will be used." ) self.logger.warning( "Make sure keys are in the Elasticsearch keystore. " "If you are trying to work with EOL versions of " "Elasticsearch, or if you intentionally enabled " "allow_insecure_settings, set \"access_key\", \"secret_key\"," " and potentially \"region\" in additional 'repo_settings'." ) # first check if snapshot repo exists self.logger.info("Getting current repository settings") existing_repo_settings = self._get_repository(es_host, repo_name, auth) if existing_repo_settings: if existing_repo_settings[repo_name] != repo_settings: # TODO update comparison logic self.logger.info( f"Repository '{repo_name}' was found but settings are different, " "it may need to be created again" ) self.logger.debug("Existing setting: %s", existing_repo_settings[repo_name]) self.logger.debug("Required (new) setting: %s" % repo_settings) else: self.logger.info("Repo exists with correct settings") else: # ok, it doesn't exist let's try to create it self.logger.info("Repo does not exist") try: self.logger.info("Creating repo...") self._create_repository(es_host, repo_name, repo_settings, auth) except Exception as e: self.logger.info("Creation failed: %s", e) if 'url' in repo_settings["settings"]: raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration " + "file, you should have a line like this: " + 'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"] + "allowing snapshot to be restored from this URL. Error was: %s" % e) else: raise uploader.ResourceError("Could not create snapshot repository: %s" % e) # repository is now ready, let's trigger the restore snapshot_name = build_meta["metadata"]["snapshot_name"] pinfo = self.get_pinfo() pinfo["step"] = "restore" pinfo["description"] = snapshot_name def get_status_info(): try: res = idxr.get_restore_status(idxr._index) return res except Exception as e: # somethng went wrong, report as failure return {"status": "FAILED %s" % e} def restore_launched(f): try: self.logger.info("Restore launched: %s" % f.result()) except Exception as e: self.logger.error("Error while lauching restore: %s" % e) raise e self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" % (snapshot_name, idxr._index, idxr.es_host)) job = yield from job_manager.defer_to_thread( pinfo, partial(idxr.restore, repo_name, snapshot_name, idxr._index, purge=self.__class__.AUTO_PURGE_INDEX)) job.add_done_callback(restore_launched) yield from job while True: status_info = get_status_info() status = status_info["status"] self.logger.info("Recovery status for index '%s': %s" % (idxr._index, status_info)) if status in ["INIT", "IN_PROGRESS"]: yield from asyncio.sleep( getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60)) else: if status == "DONE": self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" % (snapshot_name, idxr._index, idxr.es_host), extra={"notify": True}) else: e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" % (snapshot_name, idxr._index, status)) self.logger.error(e) raise e break # return current number of docs in index return self.target_backend.count()