def installed_tool_list(self): """ gets a tool list from the toolclient :return: """ tool_client = ToolClient(self.gi) return tool_client.get_tools()
def main(): try: input_path = sys.argv[1] output_path = sys.argv[2] galaxyInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) history = historyClient.create_history('tmp') uploadedFile = toolClient.upload_file(input_path, history['id'] ) workflow = workflowClient.show_workflow(WORKFLOW_ID) dataset_map = {workflow['inputs'].keys()[0]: {'id': uploadedFile['outputs'][0]['id'], 'src': 'hda'}} params = {TOOL_ID_IN_GALAXY: {'param': 'reference_genome', 'value': 'hg19'}} output = workflowClient.run_workflow(WORKFLOW_ID, dataset_map, params, history['id']) downloadDataset(datasetClient, findDatasedIdByExtention(datasetClient, output, 'bed'), output_path) #delete history historyClient.delete_history(history['id']) #if galaxy instance support dataset purging #historyClient.delete_history(history['id'], True) except IndexError: print 'usage: %s key url workflow_id history step=src=dataset_id' % os.path.basename(sys.argv[0]) sys.exit(1)
def put(filename, file_type = 'auto'): """ Given a filename of any file accessible to the docker instance, this function will upload that file to galaxy using the current history. Does not return anything. """ conf = _get_conf() gi = get_galaxy_connection() tc = ToolClient( gi ) tc.upload_file(filename, conf['history_id'], file_type = file_type)
def put(filename, file_type='auto', history_id=None): """ Given a filename of any file accessible to the docker instance, this function will upload that file to galaxy using the current history. Does not return anything. """ conf = _get_conf() gi = get_galaxy_connection() tc = ToolClient(gi) history_id = history_id or _get_history_id() tc.upload_file(filename, history_id, file_type=file_type)
def transfer_filelist_from_ftp(gi, filelist, history_name): tc = ToolClient(gi) hc = HistoryClient(gi) st = get_time_stamp() hist = hc.create_history('{}-{}'.format(history_name, st)) uploaded_files = [] for f in filelist: upf = tc.upload_from_ftp(path=os.path.basename(f), history_id=hist['id'])['outputs'][0] print(upf) uploaded_files.append(upf) return uploaded_files, hist
def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) itemp = 0 for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('_')[-1].split('.')[0] #print replicate if replicate == '1': itemp = itemp + 1 if not (itemp >= 71 and itemp <= 92): continue base = f['name'].split('_')[:-1] #print base forward_name = f['name'] reverse_name = '_'.join(base) + '_2.fastq.bz2' forward_id = f['id'] files2 = libraryClient.show_library(brassica_library[0]['id'], contents=True) for f2 in files2: if f2['name'] == reverse_name: reverse_id = f2['id'] print forward_name print reverse_name new_history_name = f['name'].split('_')[7] + "_" + f['name'].split( '_')[-3] + "_" + f['name'].split('_')[-2] print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) dataset_R = histories.upload_dataset_from_library( hist['id'], reverse_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} datamap['1'] = {'src': 'hda', 'id': dataset_R['id']} workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def main(): """ This script uses bioblend to trigger dependencies installations for the provided tools """ args = _parser().parse_args() gi = get_galaxy_connection(args) tool_client = ToolClient(gi) if args.verbose: log.basicConfig(level=log.DEBUG) if args.tool: for tool_conf_path in args.tool: # type: str _, ext = os.path.splitext(tool_conf_path) if ext == '.xml': log.info("tool_conf xml found, parsing..") # install all root = ET.ElementTree(file=tool_conf_path).getroot() if root.tag == "toolbox": # Install all from tool_conf tool_path = root.get('tool_path', '') tool_path = tool_path.replace( '${tool_conf_dir}', os.path.abspath(os.path.dirname(tool_conf_path))) if tool_path: log.info("Searching for tools relative to " + tool_path) tools = root.findall(".//tool[@file]") if len(tools) == 0: log.warning("No tools found in tool_conf") continue for tool in tools: tool_id = ET.ElementTree(file=os.path.join( tool_path, tool.get('file'))).getroot().get('id') if tool_id: log.info("Installing tool dependencies for " + tool_id + " from: " + tool.get('file')) _install(tool_client, tool_id) elif root.tag == "tool" and root.get('id'): # Install from single tool file log.info("Tool xml found. Installing " + root.get('id') + " dependencies..") _install(tool_client, root.get('id')) else: log.info("YAML tool list found, parsing..") with open(tool_conf_path) as fh: tool_ids = yaml.safe_load(fh) for tool_id in tool_ids: # Install from yaml file log.info("Installing " + tool_id + " dependencies..") _install(tool_client, tool_id) if args.id: for tool_id in args.id: # type: str log.info("Installing " + tool_id + " dependencies..") _install(tool_client, tool_id.strip())
def put(filename, file_type='auto', history_id=None, use_objects=DEFAULT_USE_OBJECTS): """ Given a filename of any file accessible to the docker instance, this function will upload that file to galaxy using the current history. Does not return anything. """ conf = _get_conf() gi = get_galaxy_connection(use_objects) history_id = history_id or _get_history_id() if use_objects: history = gi.histories.get(history_id) history.upload_dataset(filename, file_type=file_type) else: tc = ToolClient(gi) tc.upload_file(filename, history_id, file_type=file_type)
def __init__(self, galaxy_instance, configuration): """ :param galaxy_instance: A GalaxyInstance object (import from bioblend.galaxy) :param configuration: A dictionary. Examples in the ephemeris documentation. """ self.gi = galaxy_instance self.config = configuration self.tool_data_client = ToolDataClient(self.gi) self.tool_client = ToolClient(self.gi) self.possible_name_keys = ['name', 'sequence_name'] # In order of importance! self.possible_value_keys = ['value', 'sequence_id', 'dbkey'] # In order of importance! self.data_managers = self.config.get('data_managers') self.genomes = self.config.get('genomes', '') self.source_tables = DEFAULT_SOURCE_TABLES self.fetch_jobs = [] self.skipped_fetch_jobs = [] self.index_jobs = [] self.skipped_index_jobs = []
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('/')[-1][0] #print replicate if replicate == 'X': base = f['name'].split('/')[-1].split('.')[0] #print base forward_name = f['name'] forward_id = f['id'] print forward_name new_history_name = base print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} workflows = workflowsClient.get_workflows( name="Maize Small samples HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) historyClient = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) histories = historyClient.get_histories(deleted=False) for hist in histories: hist_id = hist['id'] countSecondary = historyClient.show_matching_datasets( hist_id, name_filter=name_filter) if len(countSecondary) != 0: #print(countSecondary) file_path = dir_name + '/' + hist[ 'name'] + '_' + name_filter + '.' + ext #print(file_path) #print(countSecondary[0]['dataset_id']) datasetClient.download_dataset(countSecondary[0]['id'], file_path=file_path, use_default_filename=False) sys.exit()
def runWorkflow(argDictionary, comparisons,samples): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import tempfile import time api_key = '' galaxy_host = '' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(argDictionary['accessionNumber']) comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt') sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular') if argDictionary['site'] == "ENA": #fastqs available on ENA tool_inputs = { "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} } #run the tool to get the data from ENA tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #sleep until all the fastq files are findable time.sleep(120) dirpath = tempfile.mkdtemp() fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0] fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath) num_lines = sum(1 for line in open(fileList)) -1 datasets=list() while len(datasets)!=num_lines: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) else: #for SRA if argDictionary['single'] == "TRUE": with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: print (sample) fileNames=str.split(sample["File"],"|") for fileName in fileNames: tool_inputs = { "input|input_select":"accession_number", "outputformat":"fastqsanger.gz", "input|accession":fileName } #run the tool to get the single data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs) else: with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: tool_inputs = { "accession_number":sample["File"] } #run the tool to get the paired data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) #get the fastQC tool for fastq in datasets: try: tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}} tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs) except Exception: pass #wait till complete while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #make dataset collections for quantification using the fastq files collections=list() with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: datasets=list() fileNames=str.split(row["File"],"|") for fileName in fileNames: datasets= datasets + getDatasetsByApproxName(fileName,history,history_client ) #make list of datasets collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client)) #get the correct kallisto index species = argDictionary['species'].lower() index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile") index = {'id': index, 'src': 'hda'} #run kallisto for every dataset collection for collection in collections: #set up the tool_inputs tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]} #often encounter connection broken error - possible problem with Certus server? #bypass by ignoring the exception tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs) # we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) # Run multiqc on kallisto logs and fastqc files datasets = getDatasetsByApproxName("RawData",history,history_client ) kallistoLogs = getDatasetsByApproxName(".log", history, history_client) tool_inputs = {} for i, dataset in enumerate(datasets+kallistoLogs): if not dataset["deleted"]: if dataset in datasets: software = 'fastqc' else: software = 'kallisto' params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']} tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params}) # #summarise with the multiQC tool tool_client.run_tool(history['id'],'multiqc', tool_inputs) multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0] #get all the abundance files to convert to gene level counts matrix datasets = getDatasetsByApproxName(".abundance",history,history_client ) #make a dataset collection for to make a countsMatrix collection = makeDataSetCollection(datasets,"abundances",history,history_client) #set up the tool_inputs tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']} #convert abundances to gene level counts matrix tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) txi = getDatasetsByApproxName("txi",history,history_client) #set up the tool_inputs for PCA tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs) pca = getDatasetsByApproxName("PCA",history,history_client)[0] #set up the tool_inputs for DESeq2 tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} , 'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs) #run chrdir tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #get the foldchange data, cut and run pathway workflow dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) #tool_client.upload_file(outFileName, history['id'], file_type='tsv') return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
#!/usr/bin/python # version 1.0.0 18-Apr-2018 """ This tools imports a data set into galaxy """ import sys ###REPLACE### sys.path.append('/PATH/TO/lib/python2.7/site-packages/bioblend-0.8.0-py2.7.egg') ###REPLACE### sys.path.append('/PATH/TOlib/python2.7/site-packages/requests_toolbelt-0.7.0-py2.7.egg') from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.tools import ToolClient ###REPLACE### url = "GALAXY URL" ###REPLACE### key = "API KEY of an ADMIN USER" history_id = sys.argv[1] file_path = sys.argv[2] #print("Initiating Galaxy connection") gi = GalaxyInstance(url=url, key=key) toolClient = ToolClient(gi) uploadedFile = toolClient.upload_file(file_path, history_id)
class DataManagers: def __init__(self, galaxy_instance, configuration): """ :param galaxy_instance: A GalaxyInstance object (import from bioblend.galaxy) :param configuration: A dictionary. Examples in the ephemeris documentation. """ self.gi = galaxy_instance self.config = configuration self.tool_data_client = ToolDataClient(self.gi) self.tool_client = ToolClient(self.gi) self.possible_name_keys = ['name', 'sequence_name'] # In order of importance! self.possible_value_keys = ['value', 'sequence_id', 'dbkey'] # In order of importance! self.data_managers = self.config.get('data_managers') self.genomes = self.config.get('genomes', '') self.source_tables = DEFAULT_SOURCE_TABLES self.fetch_jobs = [] self.skipped_fetch_jobs = [] self.index_jobs = [] self.skipped_index_jobs = [] def initiate_job_lists(self): """ Determines which data managers should be run to populate the data tables. Distinguishes between fetch jobs (download files) and index jobs. :return: populate self.fetch_jobs, self.skipped_fetch_jobs, self.index_jobs and self.skipped_index_jobs """ self.fetch_jobs = [] self.skipped_fetch_jobs = [] self.index_jobs = [] self.skipped_index_jobs = [] for dm in self.data_managers: jobs, skipped_jobs = self.get_dm_jobs(dm) if self.dm_is_fetcher(dm): self.fetch_jobs.extend(jobs) self.skipped_fetch_jobs.extend(skipped_jobs) else: self.index_jobs.extend(jobs) self.skipped_index_jobs.extend(skipped_jobs) def get_dm_jobs(self, dm): """Gets the job entries for a single dm. Puts entries that already present in skipped_job_list. :returns job_list, skipped_job_list""" job_list = [] skipped_job_list = [] items = self.parse_items(dm.get('items', [''])) for item in items: dm_id = dm['id'] params = dm['params'] inputs = dict() # Iterate over all parameters, replace occurences of {{item}} with the current processing item # and create the tool_inputs dict for running the data manager job for param in params: key, value = list(param.items())[0] value_template = Template(value) value = value_template.render(item=item) inputs.update({key: value}) job = dict(tool_id=dm_id, inputs=inputs) data_tables = dm.get('data_table_reload', []) if self.input_entries_exist_in_data_tables(data_tables, inputs): skipped_job_list.append(job) else: job_list.append(job) return job_list, skipped_job_list def dm_is_fetcher(self, dm): """Checks whether the data manager fetches a sequence instead of indexing. This is based on the source table. :returns True if dm is a fetcher. False if it is not.""" data_tables = dm.get('data_table_reload', []) for data_table in data_tables: if data_table in self.source_tables: return True return False def data_table_entry_exists(self, data_table_name, entry, column='value'): """Checks whether an entry exists in the a specified column in the data_table.""" try: data_table_content = self.tool_data_client.show_data_table( data_table_name) except Exception: raise Exception('Table "%s" does not exist' % data_table_name) try: column_index = data_table_content.get('columns').index(column) except IndexError: raise IndexError('Column "%s" does not exist in %s' % (column, data_table_name)) for field in data_table_content.get('fields'): if field[column_index] == entry: return True return False def input_entries_exist_in_data_tables(self, data_tables, input_dict): """Checks whether name and value entries from the input are already present in the data tables. If an entry is missing in of the tables, this function returns False""" value_entry = get_first_valid_entry(input_dict, self.possible_value_keys) name_entry = get_first_valid_entry(input_dict, self.possible_name_keys) # Return False if name and value entries are both None if not value_entry and not name_entry: return False # Check every data table for existence of name and value # Return False as soon as entry is not present for data_table in data_tables: if value_entry: if not self.data_table_entry_exists( data_table, value_entry, column='value'): return False if name_entry: if not self.data_table_entry_exists( data_table, name_entry, column='name'): return False # If all checks are passed the entries are present in the database tables. return True def parse_items(self, items): """ Parses items with jinja2. :param items: the items to be parsed :return: the parsed items """ if bool(self.genomes): items_template = Template(json.dumps(items)) rendered_items = items_template.render( genomes=json.dumps(self.genomes)) # Remove trailing " if present rendered_items = rendered_items.strip('"') items = json.loads(rendered_items) return items def run(self, log=None, ignore_errors=False, overwrite=False): """ Runs the data managers. :param log: The log to be used. :param ignore_errors: Ignore erroring data_managers. Continue regardless. :param overwrite: Overwrite existing entries in data tables """ self.initiate_job_lists() all_succesful_jobs = [] all_failed_jobs = [] all_skipped_jobs = [] if not log: log = logging.getLogger() def run_jobs(jobs, skipped_jobs): job_list = [] for skipped_job in skipped_jobs: if overwrite: log.info( '%s already run for %s. Entry will be overwritten.' % (skipped_job["tool_id"], skipped_job["inputs"])) jobs.append(skipped_job) else: log.info('%s already run for %s. Skipping.' % (skipped_job["tool_id"], skipped_job["inputs"])) all_skipped_jobs.append(skipped_job) for job in jobs: started_job = self.tool_client.run_tool( history_id=None, tool_id=job["tool_id"], tool_inputs=job["inputs"]) log.info( 'Dispatched job %i. Running DM: "%s" with parameters: %s' % (started_job['outputs'][0]['hid'], job["tool_id"], job["inputs"])) job_list.append(started_job) successful_jobs, failed_jobs = wait(self.gi, job_list, log) if failed_jobs: if not ignore_errors: log.error('Not all jobs successful! aborting...') raise RuntimeError('Not all jobs successful! aborting...') else: log.warning('Not all jobs successful! ignoring...') all_succesful_jobs.extend(successful_jobs) all_failed_jobs.extend(failed_jobs) log.info( "Running data managers that populate the following source data tables: %s" % self.source_tables) run_jobs(self.fetch_jobs, self.skipped_fetch_jobs) log.info("Running data managers that index sequences.") run_jobs(self.index_jobs, self.skipped_index_jobs) log.info('Finished running data managers. Results:') log.info('Successful jobs: %i ' % len(all_succesful_jobs)) log.info('Skipped jobs: %i ' % len(all_skipped_jobs)) log.info('Failed jobs: %i ' % len(all_failed_jobs)) InstallResults = namedtuple( "InstallResults", ["successful_jobs", "failed_jobs", "skipped_jobs"]) return InstallResults(successful_jobs=all_succesful_jobs, failed_jobs=all_failed_jobs, skipped_jobs=all_skipped_jobs)
def get_tool_panel(gi): tool_client = ToolClient(gi) return tool_client.get_tool_panel()
parser.read(sys.argv[1]) else: print "You passed %s I need a .ini file" %(sys.argv[1],) sys.exit(1) else: parser.read('configuration.ini') api_key = get_api_key(parser.get('Globals', 'api_file')) galaxy_host = parser.get('Globals', 'galaxy_host') file_name_re = re.compile(parser.get('Globals', 'sample_re')) galaxyInstance = GalaxyInstance(galaxy_host, key=api_key) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) dataSetClient = DatasetClient(galaxyInstance) files = get_files(parser.get('Globals','fastq_dir')) if len(files) == 0: print "Not able to find any fastq files looked in %s" %(parser.get('Globals', 'fastq_dir')) else: print "Found fastq files running workflow for the following files (R2's will be added)" print ",".join(files) files_to_keep = {} for R1 in files: input_dir_path = os.path.dirname(R1)+"/" R2 = R1.replace('R1','R2') if not os.path.exists(R1): print "%s File Not Found" % (R1, )
def toolbox(self): """ Gets the toolbox elements from <galaxy_url>/api/tools """ tool_client = ToolClient(self.gi) return tool_client.get_tool_panel()
def retrieve_tools_from_instance(cls, instance): galaxy_instance = GalaxyInstance(url=instance.url) tool_client = ToolClient(galaxy_instance) for element in tool_client.get_tools(): if element['model_class'] == 'Tool': tool_name = element['id'] if '/' in tool_name: tool_name = tool_name.split('/')[-2] tool = Tool.query.filter_by(name=tool_name).first() if tool is None: tool = Tool(name=tool_name) db.session.add(tool) tool.description = element['description'] tool.display_name = element['name'] if 'link' in element: link = element.get('link', None) link_start = link.find('/tool_runner') if link_start != -1: tool.link = link[link_start:] for edam_opetation_id in element.get('edam_operations', []): edam_operation = EDAMOperation.get_from_id( edam_opetation_id, allow_creation=True) if edam_operation is not None and edam_operation not in tool.edam_operations: tool.edam_operations.append(edam_operation) if 'tool_shed_repository' in element: tool_version = ToolVersion.query\ .filter_by(name=tool_name)\ .filter_by(changeset=element['tool_shed_repository']['changeset_revision'])\ .filter_by(tool_shed=element['tool_shed_repository']['tool_shed'])\ .filter_by(owner=element['tool_shed_repository']['owner'])\ .first() else: tool_version = ToolVersion.query\ .filter_by(name=tool_name)\ .filter_by(version=element['version'])\ .filter_by(tool_shed=None)\ .filter_by(owner=None)\ .first() if tool_version is None: tool_version = ToolVersion(name=tool_name, version=element['version']) db.session.add(tool_version) if 'tool_shed_repository' in element: tool_version.changeset = element['tool_shed_repository'][ 'changeset_revision'] tool_version.tool_shed = element['tool_shed_repository'][ 'tool_shed'] tool_version.owner = element['tool_shed_repository'][ 'owner'] if instance not in tool_version.instances: tool_version.instances.append(instance) if tool_version not in tool.versions: tool.versions.append(tool_version) db.session.commit()
def retrieve_tools_from_instance(cls, instance): galaxy_instance = GalaxyInstance(url=instance.url) tool_client = ToolClient(galaxy_instance) for element in tool_client.get_tools(): if element['model_class'] == 'Tool': tool_name = element['id'] if '/' in tool_name: tool_name = tool_name.split('/')[-2] try: tool = Tool.objects.get(name=tool_name) except Tool.DoesNotExist: tool = Tool(name=tool_name) tool.description = element['description'] tool.display_name = element['name'] for edam_opetation_id in element['edam_operations']: edam_operation = EDAMOperation.get_from_id( edam_opetation_id, allow_creation=True) if edam_operation is not None and edam_operation not in tool.edam_operations: tool.edam_operations.append(edam_operation) try: if 'tool_shed_repository' in element: tool_version = ToolVersion.objects.get( name=tool_name, changeset=element['tool_shed_repository'] ['changeset_revision'], tool_shed=element['tool_shed_repository'] ['tool_shed'], owner=element['tool_shed_repository']['owner']) else: tool_version = ToolVersion.objects.get( name=tool_name, version=element['version'], tool_shed=None, owner=None) except ToolVersion.DoesNotExist: tool_version = ToolVersion(name=tool_name, version=element['version']) if 'tool_shed_repository' in element: tool_version.changeset = element['tool_shed_repository'][ 'changeset_revision'] tool_version.tool_shed = element['tool_shed_repository'][ 'tool_shed'] tool_version.owner = element['tool_shed_repository'][ 'owner'] if instance not in tool_version.instances: tool_version.instances.append(instance) tool_version.save() if tool_version not in tool.versions: tool.versions.append(tool_version) tool.save()
#!/usr/bin/env python """ Use the bioblend API to create a fresh history and add a set of files to the history that were imported into the container during the build Usage: create_and_upload_history.py history_name url1 url2 url3 ... """ import sys from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient gi = GalaxyInstance(url='http://localhost:80', key='admin') tc = ToolClient(gi) lc = HistoryClient(gi) details = lc.create_history(sys.argv[1]) print "HIST ID: %s" % details["id"] i = 0 for url in sys.argv: url_parts = url.split("/") fname = url_parts[-1] if i < 2: i+=1 continue i+=1 print "submitting %s as %s" % (url,fname) tc.put_url(url,details["id"],file_name=fname)
def runWorkflow(argDictionary, comparisons): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import time api_key = '' galaxy_host = 'http://localhost:8080/' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(row['accessionNumber']) # Import the galaxy workflow workflow = workflow_client.show_workflow('a799d38679e985db') input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt') # Run workflow on csv data to create a new history. params = dict() for key in workflow['steps'].keys(): params[key] = argDictionary datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}} workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
if __name__ == '__main__': # GET PATH NAMES AND EXTENSIONS FROM COMMAND LINE INPUT input_file_full = sys.argv[1] input_file_format = input_file_full[input_file_full.rfind(".")+1:len(input_file_full)] output_file_full = sys.argv[2] output_file_format = output_file_full[output_file_full.rfind(".")+1:len(output_file_full)] # CHOOSE CONVERTER tool_id = choose_converter(input_file_format,output_file_format) # INITIALIZE GALAXY galaxy_instance = GalaxyInstance(url=base_url, key=apikey) history_client = HistoryClient(galaxy_instance) tool_client = ToolClient(galaxy_instance) dataset_client = DatasetClient(galaxy_instance) history = history_client.create_history('tmp') # UPLOAD FILES input_file_1 = tool_client.upload_file(input_file_full, history['id'], type='txt') input_file_2 = tool_client.upload_file(input_file_full, history['id'], type='txt') params = {'input_numbers_001':{'src': 'hda', 'id': input_file_1['outputs'][0]['id']},'input_numbers_002':{'src': 'hda', 'id': input_file_2['outputs'][0]['id']}} wait_4_process(history['id'],"uploading files") # RUN CONVERSION runtool_output = tool_client.run_tool(history_id=history['id'], tool_id=tool_id, tool_inputs=params) wait_4_process(history['id'],"running tool") # DOWNLOAD CONVERTED FILE download_output = dataset_client.download_dataset(runtool_output['jobs'][0]['id'],output_file_full, use_default_filename=False)
def get_tools(gi): tool_client = ToolClient(gi) return tool_client.get_tools()