def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--profile', default="DEFAULT", help="the .basespacepy.cfg profile to load") parser.add_argument('-j', '--project', required=True, nargs="+", help="project to download; can accept multiple values") args = parser.parse_args() myAPI = BaseSpaceAPI(profile=args.profile, timeout=500) user = myAPI.getUserById('current') qp = QueryParameters.QueryParameters({'Limit': 1024}) projects = user.getProjects(myAPI, qp) userProjs = stringsToBSObj(projects, args.project) for lostProj in set(args.project) - set([str(x) for x in userProjs]): warning("cannot find " + str(lostProj)) fullSampleMetadata = pd.DataFrame() fullFileMetadata = pd.DataFrame() for project in userProjs: smout, fmout = downloadProjectMetadata(project, myAPI) fullSampleMetadata = fullSampleMetadata.append(smout) fullFileMetadata = fullFileMetadata.append(fmout) thisInstant = str(datetime.datetime.today()).replace(' ', ';') fullSampleMetadata.to_csv('fullSampleMetadata.' + thisInstant + '.txt', sep='\t', header=True, index=False) fullFileMetadata.to_csv('fullFileMetadata.' + thisInstant + '.txt', sep='\t', header=True, index=False)
import sys, os, glob, logging from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp listOptions = qp({'Limit': 1024}) logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S') myAPI = BaseSpaceAPI() p = sys.argv[1] user = myAPI.getUserById('current') logging.info("User Name: %s" % str(user)) projects = myAPI.getProjectByUser(listOptions) project_list = [project.Name for project in projects] try: idx = project_list.index(sys.argv[1]) project = projects[idx] except ValueError: message = '"%s" is not in your projects. Available projects are:\n%s' % \ (sys.argv[1], '\n'.join(project_list)) logging.error(message) sys.exit(1) downloaded = glob.glob('*fastq.gz') # get already downloaded fastq logging.info("Retrieving samples from project %s" % sys.argv[1]) samples = project.getSamples(myAPI, listOptions)
exception message: 'Forbidden: App credentials do not match AppSession application'. """ """ NOTE: You will need to provide the credentials for your app (available in the developer portal). You can do this with a master config file (preferred), or by filling in values below. """ # If you're not using a config file, fill in you app's credentials here: clientKey = "" clientSecret = "" appSessionId = "" apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' # First we will initialize a BaseSpace API object using our app information and the appSessionId if clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # Using the basespaceApi we can request the appSession object corresponding to the AppSession id supplied myAppSession = myAPI.getAppSession() print myAppSession # An app session contains a referal to one or more appLaunchObjects which reference the data module # the user launched the app on. This can be a list of projects, samples, or a mixture of objects print "\nType of data the app was triggered on can be seen in 'references'" print myAppSession.References # We can also get a handle to the user who started the AppSession print "\nWe can get a handle for the user who triggered the app\n" + str( myAppSession.UserCreatedBy)
from __future__ import print_function import sys, os, glob, logging from argparse import ArgumentParser from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.QueryParameters import QueryParameters as qp list_options = qp({'Limit': 1024}) logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%d/%m/%Y %H:%M:%S', ) bs = BaseSpaceAPI() user = bs.getUserById('current') logging.info("User Name: %s", user) projects = bs.getProjectByUser(list_options) project_list = [project.Name for project in projects] cli = ArgumentParser() cli.add_argument( 'project', nargs='?', help= 'Which project to download files from. When not specified, list projects instead.' ) cli.add_argument( '--dry-run',
def download_basespace_files(config_file_path=None, client_key=None, client_secret=None, access_token=None, project_id_list=None, project_name_list=None, sample_id_list=None, sample_name_list=None, dry_run=False, output_directory=None, recreate_basespace_dir_tree=True): # Check input parameters / load from config file / defaults if not project_id_list: project_id_list = [] if not project_name_list: project_name_list = [] if not sample_id_list: sample_id_list = [] if not sample_name_list: sample_name_list = [] if not output_directory: output_directory = os.getcwd() print_stderr( "Output directory not specified; using current directory ({})". format(output_directory)) else: output_directory = os.path.abspath(output_directory) if not dry_run: safe_makedir(output_directory) config_dict = {} if config_file_path: config_parser = ConfigParser() config_parser.read(config_file_path) config_dict = config_parser._defaults if not client_key: client_key = config_dict.get('clientkey') if not client_secret: client_secret = config_dict.get('clientsecret') if not access_token: access_token = config_dict.get('accesstoken') if not (client_key and client_secret and access_token): missing_params = [] if not client_key: missing_params.append("client_key") if not client_secret: missing_params.append("client_secret") if not access_token: missing_params.append("access_token") print_stderr( 'Error: Required parameters not supplied either in config ' 'file ({}) or via arguments.'.format(config_file_path, ', '.join(missing_params))) sys.exit(1) app_session_id = config_dict.get("appsessionid") or "" api_server = config_dict.get( "apiserver") or "https://api.basespace.illumina.com" api_version = config_dict.get("apiversion") or "v1pre3" # Get the API connection object myAPI = BaseSpaceAPI(clientKey=client_key, clientSecret=client_secret, apiServer=api_server, version=api_version, appSessionId=app_session_id, AccessToken=access_token) basespace_projects = myAPI.getProjectByUser(qp({'Limit': 1024})) user = myAPI.getUserById('current') # If user specified projects, get them by name or id project_objects = [] if project_name_list: project_objects.extend( _select_from_object(filter_list=project_name_list, search_list=basespace_projects, key_attr="Name", obj_type="project", user=user)) if project_id_list: digit_pattern = re.compile(r'^\d+$') project_filtered_id_list = [] for project_id in project_id_list: if not digit_pattern.match(project_id): print_stderr( 'Error: Invalid format for user-specified project id ' '"{}": project ids are strictly numeric. Did you mean ' 'to pass this as a project name?'.format(project_id)) else: project_filtered_id_list.append(project_id) project_objects.extend( _select_from_object(filter_list=project_filtered_id_list, search_list=basespace_projects, key_attr="Id", obj_type="project", user=user)) if not (project_name_list or project_id_list): # Get all projects if none are specified by user project_objects = basespace_projects basespace_samples = [] for project_obj in project_objects: basespace_samples.extend(project_obj.getSamples(myAPI)) sample_objects = [] if sample_name_list: sample_objects.extend( _select_from_object(filter_list=sample_name_list, search_list=basespace_samples, key_attr="Name", obj_type="sample", user=user)) if sample_id_list: digit_pattern = re.compile(r'^\d+$') sample_filtered_id_list = [] for sample_id in sample_id_list: if not digit_pattern.match(sample_id): print_stderr( 'Error: Invalid format for user-specified sample id ' '"{}": sample ids are strictly numeric. Did you mean ' 'to pass this as a sample name?'.format(sample_id)) else: sample_filtered_id_list.append(sample_id) sample_objects.extend( _select_from_object(filter_list=sample_filtered_id_list, search_list=basespace_samples, key_attr="Id", obj_type="sample", user=user)) if not (sample_name_list or sample_id_list): # Get all samples if none are specified by user sample_objects = basespace_samples files_to_download = [] for sample_obj in sample_objects: files_to_download.extend(sample_obj.getFiles(myAPI)) if files_to_download: print_stderr("Found {} files to download: ".format( len(files_to_download))) for file_obj in files_to_download: print_stderr("\t- {}".format(file_obj)) print_stderr('Downloading files to output directory {}'.format( output_directory)) if recreate_basespace_dir_tree: print_stderr( "Recreating BaseSpace project directory tree for file.") if dry_run: print_stderr("-> Dry run: not downloading any data.") for i, file_obj in enumerate(files_to_download): print_stderr('[{}/{}] Downloading file "{}"'.format( i + 1, len(files_to_download), file_obj)) if not dry_run: file_obj.downloadFile(api=myAPI, localDir=output_directory, createBsDir=recreate_basespace_dir_tree) print_stderr('Download completed; files are located in "{}"'.format( output_directory)) else: print_stderr("Error: no files found to download.")
def download(clientKey=None, clientSecret=None, accessToken=None, sampleId=None, projectId=None, sampleName=None, projectName=None, outputDirectory='\.', createBsDir=True): ''' Downloads sample-level files. Project Id and project name should not be specified together; similarly sample Id and sample name should not be specified together. 1. If only a project Id or only a project name is given, all files for all samples will be downloaded within that project. If additionally a sample Id or sample name is given, then only the first matching sample within the project will be downloaded. 2. If only a sample Id is given, then all files for that sample will be downloaded. 3. If only a sample name is given, then all files within the first project containing a sample with matching name will be downloaded. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param sampleId the BaseSpace sample identifier :param projectId the BaseSpace project identifier :param sampleName the BaseSpace sample name :param projectName the BaseSpace project name :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' projectLimit = 100 sampleLimit = 1024 sampleFileLimit = 1024 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') sampleToFiles = {} if None != projectId: sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) else: myProjects = myAPI.getProjectByUser(qp({'Limit' : projectLimit})) for project in myProjects: projectId = project.Id if None != projectName and project.Name != projectName: continue sampleToFiles = Samples.__get_files_to_download(myAPI, projectId, sampleId, sampleName, sampleLimit, sampleFileLimit) if 0 < len(sampleToFiles): break numFiles = sum([len(sampleToFiles[sampleId]) for sampleId in sampleToFiles]) print("will download files from %d ." % numFiles) i = 0 for sampleId in sampleToFiles: for sampleFile in sampleToFiles[sampleId]: print('Downloading (%d/%d): %s' % ((i+1), numFiles, str(sampleFile))) print("BaseSpace File Path: %s" % sampleFile.Path) print("Sample Id: %s" % sampleId) if not options.dryRun: if createBsDir: sampleOutputDirectory = os.path.join(outputDirectory, sampleId) else: sampleOutputDirectory = outputDirectory sampleFile.downloadFile(myAPI, sampleOutputDirectory, createBsDir=createBsDir) i = i + 1 print("FASTQ file downloading complete.")
def main(uargs): uargs['--config'] = os.path.abspath(os.path.expanduser(uargs['--config'])) conf = ConfigObj(uargs['--config'], configspec=get_configspec()) try: conf_args = conf[uargs['--profile']] except KeyError: msg = 'Profile "{}" not found in config file' raise KeyError, msg.format(uargs['--profile']) # selecting config args keys = ['clientKey', 'clientSecret', 'apiServer', 'apiVersion', 'appSessionId', 'accessToken'] conf_args = [conf_args[x] for x in keys] myAPI = BaseSpaceAPI(*conf_args) # setting user if uargs['--user'] is not None: user = myAPI.getUserById(uargs['--user']) else: user = myAPI.getUserById('current') # user projects projects = myAPI.getProjectByUser() print "## The projects for this user are:" project_headers = ['ProjectID','Name','UserOwnedBy','DateCreated'] sample_headers = ['SampleID','NumReadsRaw','NumReadsPF','IsPairedEnd','Status'] print '\t'.join(project_headers + sample_headers) for project in projects: project_data = [project.Id, project.Name, project.UserOwnedBy, project.DateCreated] samples = myAPI.getSamplesByProject(project.Id) for sample in samples: sample_data = [sample.Id, sample.NumReadsRaw, sample.NumReadsPF, sample.IsPairedEnd, sample.Status] print '\t'.join([str(x) for x in project_data + sample_data]) #files = myAPI.getSampleFilesById(sample.Id) #for f in files: #fo = myAPI.getFileById(f.Id) # myAPI.fileDownload(f.Id, '.') # user runs print "## The runs for this user are:" print '\t'.join(['RunID','ExperimentName','UserOwnedBy', 'DateCreated', 'Status']) runs = user.getRuns(myAPI) for run in runs: data = [run.Id, run.ExperimentName, run.UserOwnedBy, run.DateCreated, run.Status] print '\t'.join([str(x) for x in data])
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-k', '--key', dest='key', required=True, type=str, help='specify client key') parser.add_argument('-s', '--secret', dest='secret', required=True, type=str, help='specify client secret') parser.add_argument('-t', '--token', dest='token', required=True, type=str, help='specify access token') parser.add_argument('-r', '--run', dest='run', required=False, type=str, help='specify the run name to download') parser.add_argument('-d', '--directory', dest='directory', required=False, type=str, default='./', help='specify download directory. \ The default is the current directory') parser.add_argument('--offset', dest='offset', required=False, type=int, default=0, help='specify the starting offset to read. \ The default is 0') parser.add_argument('-n', '--num_items', dest='num_items', required=False, type=int, default=10, help='specify the maximum number of items to return \ (max 1024). The default is 10') parser.add_argument('-e', '--excluded_path', dest='excluded_path', required=False, type=str, default=None, help='specify files to skip (comma separated). \ If file paths contain \ this(ese) strings, it will be skipped.)\ The default is None') args = parser.parse_args() client_key = args.key client_secret = args.secret client_token = args.token run_name = args.run download_directory = args.directory if args.excluded_path: excluded_file_path_strings = args.excluded_path.split(',') else: excluded_file_path_strings = [] num_items = args.num_items offset = args.offset base_space_url = 'https://api.basespace.illumina.com/' my_bs_api = BaseSpaceAPI(client_key, client_secret, base_space_url, 'v1pre3', '', client_token) user = my_bs_api.getUserById('current') print('User: {}'.format(str(user)), sep='', file=sys.stderr) runs = user.getRuns(my_bs_api, queryPars=qp({'Limit': num_items})) print('Run(s): {}'.format(runs), sep='', file=sys.stderr) if run_name: run = runs[[index for index, value in enumerate(runs) if value.Name == run_name][0]] print('Total size ({}): {} GB'.format(run.Name, run.TotalSize / 1000000000), sep='', file=sys.stderr) print('Offset: {}'.format(offset), sep='', file=sys.stderr) print('Number of items to return: {}'.format(num_items), sep='', file=sys.stderr) for f in run.getFiles(my_bs_api, queryPars=qp({'Limit': num_items, 'Offset': offset})): file_path = f.Path if any([i in file_path for i in excluded_file_path_strings]): print('Skipping file: {}'.format(file_path), file=sys.stderr) else: print('Downloading file: {}'.format(file_path), '...', sep=' ', end='', file=sys.stderr) try: f.downloadFile(my_bs_api, download_directory, createBsDir=True) etag = f.getFileS3metadata(my_bs_api)['etag'] file_path = download_directory + '/' + file_path if len(etag) == 32: f_md5 = md5_hash(file_path) if f_md5 == etag: print(' done (md5 correct)!', file=sys.stderr) else: print(' error (md5 incorrect)!', f.Id, etag, f_md5, file=sys.stderr) else: if f.Size == os.path.getsize(file_path): print(' done (file size correct)!', file=sys.stderr) else: print(' error (file size incorrect)!', f.Id, etag, file=sys.stderr) except Exception as e: print(' error ({})!!'.format(e), file=sys.stderr)
import json import os import unittest from BaseSpacePy.api.AppLaunchHelpers import AppSessionMetaDataRaw, AppSessionMetaDataSDK, LaunchSpecification, \ LaunchPayload from BaseSpacePy.api.BaseSpaceAPI import BaseSpaceAPI from BaseSpacePy.model.AppSessionResponse import AppSessionResponse api = BaseSpaceAPI() mydir = os.path.dirname(os.path.abspath(__file__)) app_session_path = os.path.join(mydir, "appsession.json") app_name = "BWA Whole Genome Sequencing v1.0" app_id = "279279" app_properties = [ {'Type': 'string', 'Name': 'Input.AnnotationSource'}, {'Type': 'string[]', 'Name': 'Input.FlagPCRDuplicates-id'}, {'Type': 'string', 'Name': 'Input.genome-id'}, {'Type': 'string', 'Name': 'Input.GQX-id'}, {'Type': 'project', 'Name': 'Input.project-id'}, {'Type': 'sample', 'Name': 'Input.sample-id'}, {'Type': 'string', 'Name': 'Input.StrandBias-id'} ] app_property_names = [ 'AnnotationSource', 'FlagPCRDuplicates-id', 'genome-id', 'GQX-id', 'project-id', 'sample-id',
def download(clientKey=None, clientSecret=None, accessToken=None, appResultId=None, fileNameRegexesInclude=list(), fileNameRegexesOmit=list(), outputDirectory='\.', createBsDir=True, force=False, numRetries=3): ''' Downloads App Result files. Provide an App Result identifier, and optionally regexes to include or omit files based on their names (path not included). Omission takes precedence over inclusion. :param clientKey the Illumina developer app client key :param clientSecret the Illumina developer app client secret :param accessToken the Illumina developer app access token :param appResultId the BaseSpace App Result identifier :param fileNameRegexesInclude a list of regexes on which to include files based on name :param fileNameRegexesOmit a list of regexes on which to omit files based on name (takes precedence over include) :param outputDirectory the root output directory :param createBsDir true to recreate the path structure within BaseSpace, false otherwise :param force use the force: overwrite existing files if true, false otherwise :param numRetries the number of retries for a single download API call ''' appSessionId = '' apiServer = 'https://api.basespace.illumina.com/' # or 'https://api.cloud-hoth.illumina.com/' apiVersion = 'v1pre3' fileLimit = 10000 sleepTime = 1.0 # init the API if None != clientKey: myAPI = BaseSpaceAPI(clientKey, clientSecret, apiServer, apiVersion, appSessionId, accessToken) else: myAPI = BaseSpaceAPI(profile='DEFAULT') # get the current user user = myAPI.getUserById('current') appResult = myAPI.getAppResultById(Id=appResultId) print "Retrieving files from the App Result: " + str(appResult) # Get all the files from the AppResult filesToDownload = appResult.getFiles(myAPI, queryPars=qp({'Limit': fileLimit})) # Filter file names based on the include or omit regexes includePatterns = [ re.compile(pattern) for pattern in fileNameRegexesInclude ] omitPatterns = [re.compile(pattern) for pattern in fileNameRegexesOmit] def includePatternMatch(f): if not includePatterns: return True for pattern in includePatterns: if pattern.match(f): return True return False def omitPatternMatch(f): if not omitPatterns: return False for pattern in omitPatterns: if pattern.match(f): return True return False def keepFile(f): return includePatternMatch(f) and not omitPatternMatch(f) filesToDownload = [f for f in filesToDownload if keepFile(str(f))] print "Will download %d files." % len(filesToDownload) for i in range(len(filesToDownload)): appResultFile = filesToDownload[i] print 'Downloading (%d/%d): %s' % ( (i + 1), len(filesToDownload), str(appResultFile)) print "File Path: %s" % appResultFile.Path if not options.dryRun: outputPath = str(appResultFile.Path) if not createBsDir: outputPath = os.path.basename(outputPath) if os.path.exists(outputPath): if force: print "Overwritting: %s" % outputPath else: print "Skipping existing file: %s" % outputPath continue else: print "Downloading to: %s" % outputPath retryIdx = 0 retryException = None while retryIdx < numRetries: try: appResultFile.downloadFile(myAPI, outputDirectory, createBsDir=createBsDir) except BaseSpaceException.ServerResponseException as e: retryIdx += 1 time.sleep(sleepTime) retryException = e else: break if retryIdx == numRetries: raise retryException print "Download complete."