def create_project_batch( # pylint: disable=too-many-arguments project_id, batch_type, batch_name, sample_ids, host, email, password, api_key, ): """Create a batch in a project.""" if sample_ids: sample_ids = [s_id.strip() for s_id in sample_ids.split(",")] echo_debug("Sample ids translation: {}".format(sample_ids)) else: sample_ids = [] CreateBatch( project_id, batch_type, batch_name, sample_ids, Credentials(email=email, password=password, api_key=api_key), Optionals(host=host), ).run()
def download_file(destination, download_url, no_progress=False): """Download a file to file system. Args: destination (File): file object download_url (str): url of the file to download no_progress (bool): don't show progress bar Returns: str : file path location of the downloaded file """ stream_params = dict(stream=True, allow_redirects=False, headers=dict(), timeout=30) with requests.get(download_url, **stream_params) as req: req.raise_for_status() echo_debug("Starting download") if not no_progress: pbar = get_progress_bar(int(req.headers["content-length"]), "Downloading: ") pbar.start() for chunk in req.iter_content(chunk_size=CHUNK_SIZE): destination.write(chunk) if not no_progress: pbar.update(pbar.value + len(chunk)) if not no_progress: pbar.finish() echo_debug("Finished downloading")
def validate_credentials(credentials): """Validate user credentials.""" if credentials.email and credentials.password and credentials.api_key: echo_debug("User provided 2 sets of credentials.") echo_warning("Multiple sets of credentials provided." "Please provide either username/password or API key.") return False return True
def seek_files_to_upload(path, path_root=""): """Generate a list of valid fastq files.""" for root, dirs, files in os.walk(path): files.sort() for file in files: file_path = os.path.join(path_root, root, file) if file_path.lower().endswith(FASTQ_EXTENSIONS): echo_debug("Found file to upload: {}".format(file_path)) yield file_path dirs.sort() for folder in dirs: seek_files_to_upload(folder, root)
def build_file_path(deliverable, file_with_prefix, download_to, filename=None): """Create and return file system path where the file will be downloaded to. Args: deliverable (dict): used to get download url and file type file_with_prefix (str): used as a template for download path download_to (str): general location where to download the file to Returns: str : file path on current file system """ prefix = _get_prefix_parts(file_with_prefix) download_url, file_type = "", "" if isinstance(deliverable, SampleFile): download_url, file_type = ( deliverable.download_url, deliverable.file_type, ) else: download_url, file_type = ( deliverable.get("download_url"), deliverable.get("file_type"), ) source_filename = (filename if filename else get_filename_from_download_url(download_url)) destination_filename = prefix.filename if prefix.file_extension: destination_filename = "{}.{}".format(prefix.filename, prefix.file_extension) # turning off formatting for improved code readability # fmt: off destination_filename = destination_filename.format( **{ DownloadTemplateParts.FILE_TYPE.value: FILE_TYPES_MAPPER.get(file_type) or file_type, # noqa: E501 # pylint: disable=line-too-long DownloadTemplateParts.FILE_EXTENSION.value: deliverable_type_from_filename(source_filename), # noqa: E501 # pylint: disable=line-too-long DownloadTemplateParts.DEFAULT_FILENAME.value: source_filename, }) # fmt: on echo_debug( "Calculated destination filename: {}".format(destination_filename)) return _create_filepath(download_to, prefix.dirs, destination_filename)
def delete_project_samples( # pylint: disable=too-many-arguments project_id, sample_ids, host, email, password, api_key, ): """Delete samples in a project.""" sample_ids = ([s_id.strip() for s_id in sample_ids.split(",")] if sample_ids else []) echo_debug("Sample ids translation: {}".format(sample_ids)) DeleteSamples( project_id, sample_ids, Credentials(email=email, password=password, api_key=api_key), Optionals(host=host), ).run()
def get_filename_from_download_url(url): """Deduce filename from url. Args: url (str): URL string Returns: str: filename """ try: filename = re.findall( FILENAME_RE, parse_qs(urlparse(url).query)["response-content-disposition"][0], )[0] except (KeyError, IndexError): echo_debug("URL didn't contain filename query argument. " "Assume filename from url") filename = urlparse(url).path.split("/")[-1] return filename
def login(api_client, credentials): """Login user into Gencove's system.""" if credentials.api_key: echo_debug("User authorized via api key") api_client.set_api_key(credentials.api_key) return True if not credentials.email or not credentials.password: echo_info("Login required") if not credentials.email: credentials.email = click.prompt("Email", type=str, err=True) if not credentials.password: credentials.password = click.prompt("Password", type=str, hide_input=True, err=True) try: api_client.login(credentials.email, credentials.password, credentials.otp_token) echo_debug("User logged in successfully") return True except APIClientError as err: if "otp_token" in err.message: echo_info("One time password required") credentials.otp_token = click.prompt("One time password", type=str, err=True) return login(api_client, credentials) echo_debug("Failed to login: {}".format(err)) echo_error( "Failed to login. Please verify your credentials and try again") return False
def basespace_import( # pylint: disable=too-many-arguments basespace_project_ids, project_id, metadata_json, host, email, password, api_key, ): # pylint: disable=line-too-long """Import all Biosamples from BaseSpace projects to a project. Optionally add metadata to the samples. Examples: Import Biosamples to a project: gencove basespace projects import 12345678 06a5d04b-526a-4471-83ba-fb54e0941758 Import Biosamples from multiple BaseSpace projects to a project: gencove basespace projects import 12345678,87654321 06a5d04b-526a-4471-83ba-fb54e0941758 Import Biosamples to a project with metadata: gencove basespace projects import 12345678 06a5d04b-526a-4471-83ba-fb54e0941758 --metadata-json='{"batch": "batch1"}' """ # noqa: E501 basespace_project_ids = [ basespace_project_id.strip() for basespace_project_id in basespace_project_ids.split(",") ] echo_debug( "BaseSpace project ids translation: {}".format(basespace_project_ids)) BaseSpaceImport( basespace_project_ids, project_id, Credentials(email=email, password=password, api_key=api_key), BaseSpaceImportOptionals(host=host, metadata_json=metadata_json), ).run()
def _create_filepath(download_to, prefix_dirs, filename): """Build full file path and ensure that directory structure exists. Args: download_to (str): top level directory path prefix_dirs (str): subdirectories structure to create under download_to. filename (str): name of the file inside download_to/file_prefix structure. """ echo_debug("_create_filepath Downloading to: {}".format(download_to)) echo_debug("_create_filepath file prefix is: {}".format(prefix_dirs)) path = os.path.join(download_to, prefix_dirs) # Cross-platform cross-python-version directory creation if not os.path.exists(path): echo_debug("creating path: {}".format(path)) os.makedirs(path) file_path = os.path.join(path, filename) echo_debug("Deduced full file path is {}".format(file_path)) return file_path
def echo_debug(msg, **kwargs): """Output debug message.""" echo_debug(msg, **kwargs)
def deliverable_type_from_filename(filename): """Deduce deliverable type based on dot notation.""" filetype = ".".join(filename.split(".")[1:]) echo_debug("Deduced filetype to be: {} " "from filename: {}".format(filetype, filename)) return filetype
def download_file(file_path, download_url, skip_existing=True, no_progress=False): """Download a file to file system. Args: file_path (str): full file path, according to destination and download template download_url (str): url of the file to download skip_existing (bool): skip already downloaded files no_progress (bool): don't show progress bar Returns: str : file path location of the downloaded file """ file_path_tmp = "{}.tmp".format(file_path) if os.path.exists(file_path_tmp): file_mode = "ab" headers = dict( Range="bytes={}-".format(os.path.getsize(file_path_tmp))) echo_info("Resuming previous download: {}".format(file_path)) else: file_mode = "wb" headers = dict() echo_info("Downloading file to {}".format(file_path)) stream_params = dict(stream=True, allow_redirects=False, headers=headers, timeout=30) with requests.get(download_url, **stream_params) as req: req.raise_for_status() total = int(req.headers["content-length"]) # pylint: disable=E0012,C0330 if (skip_existing and os.path.isfile(file_path) and os.path.getsize(file_path) == total): echo_info("Skipping existing file: {}".format(file_path)) return file_path echo_debug("Starting to download file to: {}".format(file_path)) with open(file_path_tmp, file_mode) as downloaded_file: if not no_progress: pbar = get_progress_bar(int(req.headers["content-length"]), "Downloading: ") pbar.start() for chunk in req.iter_content(chunk_size=CHUNK_SIZE): downloaded_file.write(chunk) if not no_progress: pbar.update(pbar.value + len(chunk)) if not no_progress: pbar.finish() # Cross-platform cross-python-version file overwriting if os.path.exists(file_path): echo_debug("Found old file under same name: {}. " "Removing it.".format(file_path)) os.remove(file_path) os.rename(file_path_tmp, file_path) echo_info("Finished downloading a file: {}".format(file_path)) return file_path
def _refresh_authentication(self): echo_debug("Refreshing authentication") jwt = self.refresh_token(self._jwt_refresh_token) self._set_jwt(jwt.access)
def _request( self, endpoint="", params=None, method="get", custom_headers=None, timeout=60, sensitive=False, ): url = urljoin(text(self.host), text(endpoint)) headers = { "content-type": "application/json", "date": None, "Gencove-cli-version": cli_version(), } if custom_headers: headers.update(custom_headers) if not params: params = {} echo_debug( "Contacting url: {} with payload: {}".format( url, "[SENSITIVE CONTENT]" if sensitive else params ) ) start = time.time() try: if method == "get": response = get( url=url, params=params, headers=headers, timeout=timeout ) elif method == "delete": post_payload = APIClient._serialize_post_payload(params) response = delete( url=url, data=post_payload, headers=headers, timeout=timeout, ) else: post_payload = APIClient._serialize_post_payload(params) response = post( url=url, data=post_payload, headers=headers, timeout=timeout, ) if response.status_code == 429: raise APIClientTooManyRequestsError("Too Many Requests") except (ConnectTimeout, ConnectionError): # If request timed out, # let upper level handle it the way it sees fit. # one place might want to retry another might not. raise APIClientTimeout( # pylint: disable=W0707 "Could not connect to the api server" ) except ReadTimeout: raise APIClientTimeout( # pylint: disable=W0707 "API server did not respond in timely manner" ) echo_debug( "API response is {} status is {} in {}ms".format( "[SENSITIVE CONTENT]" if sensitive else response.content, response.status_code, (time.time() - start) * 1000, ) ) # pylint: disable=no-member if response.status_code >= 200 and response.status_code < 300: return response.json() if response.text else {} http_error_msg = "" if 400 <= response.status_code < 500: http_error_msg = "API Client Error: {}".format(response.reason) if response.text: response_json = response.json() if "detail" in response_json: http_error_msg += ": {}".format(response_json["detail"]) else: try: error_msg = "\n".join( [ # create-batch can return error details that # is a dict, not a list " {}: {}".format( key, value[0] if isinstance(value, list) else str(value), ) for key, value in response_json.items() ] ) except AttributeError: error_msg = "\n".join(response_json) http_error_msg += ":\n{}".format(error_msg) elif 500 <= response.status_code < 600: http_error_msg = "Server Error: {}".format(response.reason) raise APIClientError(http_error_msg, response.status_code)
def download( # pylint: disable=E0012,C0330,R0913 destination, project_id, sample_ids, file_types, skip_existing, download_urls, download_template, host, email, password, api_key, no_progress, ): # noqa: D413,D301,D412 # pylint: disable=C0301 """Download deliverables of a project. Must specify either project id or sample ids. Examples: Download all samples results: gencove download ./results --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db Download some samples: gencove download ./results --sample-ids 59f5c1fd-cce0-4c4c-90e2-0b6c6c525d71,7edee497-12b5-4a1d-951f-34dc8dce1c1d Download specific deliverables: gencove download ./results --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db --file-types alignment-bam,impute-vcf,fastq-r1,fastq-r2 Skip download entirely and print out the deliverables as a JSON: gencove download - --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db --download-urls \f Args: destination (str): path/to/save/deliverables/to. project_id (str): project id in Gencove's system. sample_ids (list(str), optional): specific samples for which to download the results. if not specified, download deliverables for all samples. file_types (list(str), optional): specific deliverables to download results for. if not specified, all file types will be downloaded. skip_existing (bool, optional, default True): skip downloading existing files. download_urls (bool, optional): output the files available for a download. if the destination parameter is "-", it goes to the stdout. no_progress (bool, optional, default False): do not show progress bar. """ # noqa: E501 s_ids = tuple() if sample_ids: s_ids = tuple(s_id.strip() for s_id in sample_ids.split(",")) echo_debug("Sample ids translation: {}".format(s_ids)) f_types = tuple() if file_types: f_types = tuple(f_type.strip() for f_type in file_types.split(",")) echo_debug("File types translation: {}".format(f_types)) Download( destination, DownloadFilters(project_id=project_id, sample_ids=s_ids, file_types=f_types), Credentials(email=email, password=password, api_key=api_key), DownloadOptions( host=host, skip_existing=skip_existing, download_template=download_template, ), download_urls, no_progress, ).run()