def _retry_request(self, request, timeout=2, attempts=3): """The Google Python API client frequently has BrokenPipe errors. This function takes a request, and executes it up to number of retry, each time with a 2* increase in timeout. Parameters ========== request: the Google Cloud request that needs to be executed timeout: time to sleep (in seconds) before trying again attempts: remaining attempts, throw error when hit 0 """ import googleapiclient try: return request.execute() except BrokenPipeError as ex: if attempts > 0: time.sleep(timeout) return self._retry_request(request, timeout * 2, attempts - 1) raise ex except googleapiclient.errors.HttpError as ex: log_verbose_traceback(ex) raise ex except Exception as ex: log_verbose_traceback(ex) raise ex
def _get_bucket(self): """get a connection to the storage bucket (self.bucket) and exit if the name is taken or otherwise invalid. Parameters ========== workflow: the workflow object to derive the prefix from """ import google # Hold path to requested subdirectory and main bucket bucket_name = self.workflow.default_remote_prefix.split("/")[0] self.gs_subdir = re.sub("^{}/".format(bucket_name), "", self.workflow.default_remote_prefix) self.gs_logs = os.path.join(self.gs_subdir, "google-lifesciences-logs") # Case 1: The bucket already exists try: self.bucket = self._bucket_service.get_bucket(bucket_name) # Case 2: The bucket needs to be created except google.cloud.exceptions.NotFound: self.bucket = self._bucket_service.create_bucket(bucket_name) # Case 2: The bucket name is already taken except Exception as ex: logger.error("Cannot get or create {} (exit code {}):\n{}".format( bucket_name, ex.returncode, ex.output.decode())) log_verbose_traceback(ex) raise ex logger.debug("bucket=%s" % self.bucket.name) logger.debug("subdir=%s" % self.gs_subdir) logger.debug("logs=%s" % self.gs_logs)
def _get_services(self): """use the Google Discovery Build to generate API clients for Life Sciences, and use the google storage python client for storage. """ from googleapiclient.discovery import build as discovery_build from oauth2client.client import ( GoogleCredentials, ApplicationDefaultCredentialsError, ) from google.cloud import storage # Credentials must be exported to environment try: creds = GoogleCredentials.get_application_default() except ApplicationDefaultCredentialsError as ex: log_verbose_traceback(ex) raise ex # Discovery clients for Google Cloud Storage and Life Sciences API self._storage_cli = discovery_build("storage", "v1", credentials=creds, cache_discovery=False) self._compute_cli = discovery_build("compute", "v1", credentials=creds, cache_discovery=False) self._api = discovery_build("lifesciences", "v2beta", credentials=creds, cache_discovery=False) self._bucket_service = storage.Client()
def _get_services(self): """ Use the Google Discovery Build to generate API clients for Life Sciences, and use the google storage python client for storage. """ from googleapiclient.discovery import build as discovery_build from google.cloud import storage import google.auth import google_auth_httplib2 import httplib2 import googleapiclient # Credentials must be exported to environment try: # oauth2client is deprecated, see: https://google-auth.readthedocs.io/en/master/oauth2client-deprecation.html # google.auth is replacement # not sure about scopes here. this cover all cloud services creds, _ = google.auth.default( scopes=["https://www.googleapis.com/auth/cloud-platform"]) except google.auth.DefaultCredentialsError as ex: log_verbose_traceback(ex) raise ex def build_request(http, *args, **kwargs): """ See https://googleapis.github.io/google-api-python-client/docs/thread_safety.html """ new_http = google_auth_httplib2.AuthorizedHttp( creds, http=httplib2.Http()) return googleapiclient.http.HttpRequest(new_http, *args, **kwargs) # Discovery clients for Google Cloud Storage and Life Sciences API # create authorized http for building services authorized_http = google_auth_httplib2.AuthorizedHttp( creds, http=httplib2.Http()) self._storage_cli = discovery_build( "storage", "v1", cache_discovery=False, requestBuilder=build_request, http=authorized_http, ) self._compute_cli = discovery_build( "compute", "v1", cache_discovery=False, requestBuilder=build_request, http=authorized_http, ) self._api = discovery_build( "lifesciences", "v2beta", cache_discovery=False, requestBuilder=build_request, http=authorized_http, ) self._bucket_service = storage.Client()
def run_wrapper(run, input, output, params, wildcards, threads, resources, log, version, benchmark, benchmark_repeats, rule, conda_env, linemaps, debug=False, shadow_dir=None): """ Wrapper around the run method that handles exceptions and benchmarking. Arguments run -- the run method input -- list of input files output -- list of output files wildcards -- so far processed wildcards threads -- usable threads log -- list of log files rule (str) -- rule name shadow_dir -- optional shadow directory root """ if os.name == "posix" and debug: sys.stdin = open('/dev/stdin') try: runs = 1 if benchmark is None else benchmark_repeats wallclock = [] for i in range(runs): w = time.time() # execute the actual run method. with change_working_directory(shadow_dir): run(input, output, params, wildcards, threads, resources, log, version, rule, conda_env) w = time.time() - w wallclock.append(w) except (KeyboardInterrupt, SystemExit) as e: # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it raise e except (Exception, BaseException) as ex: log_verbose_traceback(ex) # this ensures that exception can be re-raised in the parent thread lineno, file = get_exception_origin(ex, linemaps) raise RuleException(format_error(ex, lineno, linemaps=linemaps, snakefile=file, show_traceback=True)) if benchmark is not None: try: with open(benchmark, "w") as f: print("s", "h:m:s", sep="\t", file=f) for t in wallclock: print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f) except (Exception, BaseException) as ex: raise WorkflowError(ex)
def run_wrapper(run, input, output, params, wildcards, threads, resources, log, version, benchmark, benchmark_repeats, linemaps, debug=False, shadow_dir=None): """ Wrapper around the run method that handles exceptions and benchmarking. Arguments run -- the run method input -- list of input files output -- list of output files wildcards -- so far processed wildcards threads -- usable threads log -- list of log files shadow_dir -- optional shadow directory root """ if os.name == "posix" and debug: sys.stdin = open('/dev/stdin') try: runs = 1 if benchmark is None else benchmark_repeats wallclock = [] for i in range(runs): w = time.time() # execute the actual run method. with change_working_directory(shadow_dir): run(input, output, params, wildcards, threads, resources, log, version) w = time.time() - w wallclock.append(w) except (KeyboardInterrupt, SystemExit) as e: # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it raise e except (Exception, BaseException) as ex: log_verbose_traceback(ex) # this ensures that exception can be re-raised in the parent thread lineno, file = get_exception_origin(ex, linemaps) raise RuleException(format_error(ex, lineno, linemaps=linemaps, snakefile=file, show_traceback=True)) if benchmark is not None: try: with open(benchmark, "w") as f: print("s", "h:m:s", sep="\t", file=f) for t in wallclock: print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f) except (Exception, BaseException) as ex: raise WorkflowError(ex)