def get_library_contents(gi, path): """ Return the contents of a data library Returns a list of LibraryFolder and LibraryDataset instances representing the contents of the specified Galaxy data library. FIXME: should filter on full path (currently lists everything) Arguments: gi (GalaxyInstance): bioblend GalaxyInstance path (str): path of the data library to fetch the contents of Returns: list: list of folders and datasets. """ logging.debug("Path '%s'" % path) lib_client = galaxy.libraries.LibraryClient(gi) library_name, folder_path = split_library_path(path) logging.debug("library_name '%s'" % library_name) library_id = library_id_from_name(gi, library_name) if library_id is None: print "No library '%s'" % library_name return # Get library contents contents = [] for item in lib_client.show_library(library_id, contents=True): if item['type'] == 'folder': contents.append(LibraryFolder(item)) else: contents.append(LibraryDataset(item)) return contents
def get_library_contents(gi,path): """ Return the contents of a data library Returns a list of LibraryFolder and LibraryDataset instances representing the contents of the specified Galaxy data library. FIXME: should filter on full path (currently lists everything) Arguments: gi (GalaxyInstance): bioblend GalaxyInstance path (str): path of the data library to fetch the contents of Returns: list: list of folders and datasets. """ logging.debug("Path '%s'" % path) lib_client = galaxy.libraries.LibraryClient(gi) library_name,folder_path = split_library_path(path) logging.debug("library_name '%s'" % library_name) library_id = library_id_from_name(gi,library_name) if library_id is None: print "No library '%s'" % library_name return # Get library contents contents = [] for item in lib_client.show_library(library_id,contents=True): if item['type'] == 'folder': contents.append(LibraryFolder(item)) else: contents.append(LibraryDataset(item)) return contents
def create_data_library(galaxy_url, library_name, analysis_dir, dest, projects=None, no_verify=False, wait_interval=5.0): """ Create and populate data library in Galaxy Arguments: galaxy_url (str): URL or alias of Galaxy server library_name (str): name of the library on the server to export the files to analysis_dir (AnalysisDir): analysis directory to export the files from dest (str): location of top-level data library directory projects (list): list of projects to export (default is to export all projects) no_verify (boolean): True to disable SSL certificate checking when connecting to Galaxy server (default is to verify certificate) wait_interval (float): number of seconds to wait for upload of file to complete """ # Split up destination path user, server, dirn = split_user_host_dir(dest) remote = (server is not None) # Turn off SSL certificate verification? if no_verify: logging.warning("SSL certificate verification disabled") turn_off_urllib3_warnings() # Create data library structure in Galaxy print "Fetching Galaxy instance for %s" % galaxy_url gi = get_galaxy_instance(galaxy_url, verify_ssl=(not no_verify)) if gi is None: logging.critical("%s: failed to connect to Galaxy instance" % galaxy_url) raise GalaxyUploadException("%s: failed to connect to Galaxy " "instance" % galaxy_url) # Create the data library print "Creating folder for run in Galaxy" run_path = '/'.join((library_name, analysis_dir.run_name)) library = library_id_from_name(gi, library_name) if library is None: logging.critical("%s: library not found" % library_name) raise GalaxyUploadException("%s: library not found" % library_name) run_folder = folder_id_from_name(gi, library, analysis_dir.run_name) if run_folder is None: description = "Data for %s run #%s datestamped %s" \ % (analysis_dir.metadata.platform.upper(), analysis_dir.metadata.run_number, analysis_dir.date_stamp) run_folder = create_folder(gi, run_path, description) if run_folder is None: logging.critical("%s: failed to create folder" % run_path) raise GalaxyUploadException("%s: failed to create folder" % run_path) else: print "Created run folder: '%s' '%s'" % (run_path, description) else: logging.warning("%s: run folder already exists" % analysis_dir.run_name) for project in analysis_dir.get_projects(include_undetermined=False): if projects is not None and project.name not in projects: print "Ignoring project '%s'" % project.name continue project_name = "Fastqs (%s: %s)" % ( project.name, project.info.organism.replace('/', ',')) project_path = '/'.join((run_path, project_name)) if project.info.organism is not None: description = "%s: %s" % (project.name, project.info.organism.replace('/', ',')) else: description = "%s" % project.name project_folder = folder_id_from_name( gi, library, os.path.join(analysis_dir.run_name, project_name)) if project_folder is not None: print "Found existing subfolder for %s" % project.name else: print "Creating subfolder for project '%s'" % project.name print "-- name : %s" % project_path print "-- description: %s" % description project_folder = create_folder(gi, project_path, description) if project_folder is None: logging.critical("%s: failed to create folder" % project_path) raise GalaxyUploadException("%s: failed to create folder" % project_path) print "Populating project folder:" for sample in project.samples: fastqs = [] for fq in sample.fastq: fqcp = os.path.join(dirn, analysis_dir.run_name, project.name, os.path.basename(fq)) if fqcp.endswith('.gz'): fqcp = fqcp[0:-3] matches = filter( lambda x: os.path.basename(x.name) == os.path.basename(fqcp ), get_library_contents(gi, project_path)) if len(matches): print "-- found: %s" % fqcp else: print "-- %s" % fqcp fastqs.append(fqcp) if fastqs: for fq in fastqs: # Add one at a time and pause to try # and prevent overloading the server add_library_datasets(gi, project_path, [ fq, ], from_server=True, link_only=True, file_type='fastqsanger') time.sleep(wait_interval)
def create_data_library(galaxy_url,library_name,analysis_dir,dest, projects=None,no_verify=False, wait_interval=5.0): """ Create and populate data library in Galaxy Arguments: galaxy_url (str): URL or alias of Galaxy server library_name (str): name of the library on the server to export the files to analysis_dir (AnalysisDir): analysis directory to export the files from dest (str): location of top-level data library directory projects (list): list of projects to export (default is to export all projects) no_verify (boolean): True to disable SSL certificate checking when connecting to Galaxy server (default is to verify certificate) wait_interval (float): number of seconds to wait for upload of file to complete """ # Split up destination path user,server,dirn = split_user_host_dir(dest) remote = (server is not None) # Turn off SSL certificate verification? if no_verify: logging.warning("SSL certificate verification disabled") turn_off_urllib3_warnings() # Create data library structure in Galaxy print "Fetching Galaxy instance for %s" % galaxy_url gi = get_galaxy_instance(galaxy_url,verify=(not no_verify)) if gi is None: logging.critical("%s: failed to connect to Galaxy instance" % galaxy_url) raise GalaxyUploadException("%s: failed to connect to Galaxy " "instance" % galaxy_url) # Create the data library print "Creating folder for run in Galaxy" run_path = '/'.join((library_name,analysis_dir.run_name)) library = library_id_from_name(gi,library_name) if library is None: logging.critical("%s: library not found" % library_name) raise GalaxyUploadException("%s: library not found" % library_name) run_folder = folder_id_from_name(gi,library, analysis_dir.run_name) if run_folder is None: description = "Data for %s run #%s datestamped %s" \ % (analysis_dir.metadata.platform.upper(), analysis_dir.metadata.run_number, analysis_dir.date_stamp) run_folder = create_folder(gi,run_path,description) if run_folder is None: logging.critical("%s: failed to create folder" % run_path) raise GalaxyUploadException("%s: failed to create folder" % run_path) else: print "Created run folder: '%s' '%s'" % (run_path,description) else: logging.warning("%s: run folder already exists" % analysis_dir.run_name) for project in analysis_dir.get_projects( include_undetermined=False): if projects is not None and project.name not in projects: print "Ignoring project '%s'" % project.name continue project_name = "Fastqs (%s: %s)" % (project.name, project.info.organism.replace('/',',')) project_path = '/'.join((run_path,project_name)) if project.info.organism is not None: description = "%s: %s" % (project.name, project.info.organism.replace('/',',')) else: description = "%s" % project.name project_folder = folder_id_from_name(gi,library, os.path.join(analysis_dir.run_name, project_name)) if project_folder is not None: print "Found existing subfolder for %s" % project.name else: print "Creating subfolder for project '%s'" % project.name print "-- name : %s" % project_path print "-- description: %s" % description project_folder = create_folder(gi,project_path,description) if project_folder is None: logging.critical("%s: failed to create folder" % project_path) raise GalaxyUploadException("%s: failed to create folder" % project_path) print "Populating project folder:" for sample in project.samples: fastqs = [] for fq in sample.fastq: fqcp = os.path.join(dirn, analysis_dir.run_name, project.name, os.path.basename(fq)) if fqcp.endswith('.gz'): fqcp = fqcp[0:-3] matches = filter(lambda x: os.path.basename(x.name) == os.path.basename(fqcp), get_library_contents(gi,project_path)) if len(matches): print "-- found: %s" % fqcp else: print "-- %s" % fqcp fastqs.append(fqcp) if fastqs: for fq in fastqs: # Add one at a time and pause to try # and prevent overloading the server add_library_datasets(gi, project_path, [fq,], from_server=True, link_only=True, file_type='fastqsanger') time.sleep(wait_interval)