def installed_tool_list(self):
     """
     gets a tool list from the toolclient
     :return:
     """
     tool_client = ToolClient(self.gi)
     return tool_client.get_tools()
Exemplo n.º 2
0
def main():
    try:
        input_path = sys.argv[1]
        output_path = sys.argv[2]

        galaxyInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY)
        historyClient = HistoryClient(galaxyInstance)
        toolClient = ToolClient(galaxyInstance)
        workflowClient = WorkflowClient(galaxyInstance)
        datasetClient = DatasetClient(galaxyInstance)

        history = historyClient.create_history('tmp')
        uploadedFile = toolClient.upload_file(input_path, history['id'] )

        workflow = workflowClient.show_workflow(WORKFLOW_ID)
        dataset_map  = {workflow['inputs'].keys()[0]: {'id': uploadedFile['outputs'][0]['id'], 'src': 'hda'}}
        params = {TOOL_ID_IN_GALAXY: {'param': 'reference_genome', 'value': 'hg19'}}
        output = workflowClient.run_workflow(WORKFLOW_ID, dataset_map, params, history['id'])

        downloadDataset(datasetClient, findDatasedIdByExtention(datasetClient, output, 'bed'), output_path)
        #delete history
        historyClient.delete_history(history['id'])
        #if galaxy instance support dataset purging
        #historyClient.delete_history(history['id'], True)

    except IndexError:
        print 'usage: %s key url workflow_id history step=src=dataset_id' % os.path.basename(sys.argv[0])
        sys.exit(1)
 def installed_tool_list(self):
     """
     gets a tool list from the toolclient
     :return:
     """
     tool_client = ToolClient(self.gi)
     return tool_client.get_tools()
def put(filename, file_type = 'auto'):
    """
        Given a filename of any file accessible to the docker instance, this
        function will upload that file to galaxy using the current history.
        Does not return anything.
    """
    conf = _get_conf()
    gi = get_galaxy_connection()
    tc = ToolClient( gi )
    tc.upload_file(filename, conf['history_id'], file_type = file_type)
Exemplo n.º 5
0
def put(filename, file_type='auto', history_id=None):
    """
        Given a filename of any file accessible to the docker instance, this
        function will upload that file to galaxy using the current history.
        Does not return anything.
    """
    conf = _get_conf()
    gi = get_galaxy_connection()
    tc = ToolClient(gi)
    history_id = history_id or _get_history_id()
    tc.upload_file(filename, history_id, file_type=file_type)
Exemplo n.º 6
0
def transfer_filelist_from_ftp(gi, filelist, history_name):

    tc = ToolClient(gi)
    hc = HistoryClient(gi)

    st = get_time_stamp()
    hist = hc.create_history('{}-{}'.format(history_name, st))

    uploaded_files = []
    for f in filelist:
        upf = tc.upload_from_ftp(path=os.path.basename(f),
                                 history_id=hist['id'])['outputs'][0]
        print(upf)
        uploaded_files.append(upf)
    return uploaded_files, hist
Exemplo n.º 7
0
 def create_clients(self):
     '''
     Create bioblend clients for the Galaxy instance.
     '''
     # Create first client and check if the API works
     self.config_client = ConfigClient(self.instance)
     try:
         self.config_client.get_version()
         self.config_client.get_config()
     except:
         logger.error("Provided API-key does not work.")
         return False
     try:
         self.user_client = UserClient(self.instance)
         self.workflow_client = WorkflowClient(self.instance)
         self.tool_client = ToolClient(self.instance)
         self.toolshed_client = ToolShedClient(self.instance)
         self.library_client = LibraryClient(self.instance)
         self.roles_client = RolesClient(self.instance)
         self.history_client = HistoryClient(self.instance)
         self.dataset_client = DatasetClient(self.instance)
     except:
         logger.error("Error initializing other bioblend clients.")
         return False
     return True
Exemplo n.º 8
0
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)
    itemp = 0
    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('_')[-1].split('.')[0]
        #print replicate
        if replicate == '1':
            itemp = itemp + 1
            if not (itemp >= 71 and itemp <= 92):
                continue
            base = f['name'].split('_')[:-1]
            #print base
            forward_name = f['name']
            reverse_name = '_'.join(base) + '_2.fastq.bz2'
            forward_id = f['id']
            files2 = libraryClient.show_library(brassica_library[0]['id'],
                                                contents=True)
            for f2 in files2:
                if f2['name'] == reverse_name:
                    reverse_id = f2['id']
            print forward_name
            print reverse_name
            new_history_name = f['name'].split('_')[7] + "_" + f['name'].split(
                '_')[-3] + "_" + f['name'].split('_')[-2]
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            dataset_R = histories.upload_dataset_from_library(
                hist['id'], reverse_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            datamap['1'] = {'src': 'hda', 'id': dataset_R['id']}
            workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')
Exemplo n.º 9
0
def main():
    """
    This script uses bioblend to trigger dependencies installations for the provided tools
    """
    args = _parser().parse_args()
    gi = get_galaxy_connection(args)
    tool_client = ToolClient(gi)

    if args.verbose:
        log.basicConfig(level=log.DEBUG)

    if args.tool:
        for tool_conf_path in args.tool:  # type: str
            _, ext = os.path.splitext(tool_conf_path)
            if ext == '.xml':
                log.info("tool_conf xml found, parsing..")
                # install all
                root = ET.ElementTree(file=tool_conf_path).getroot()
                if root.tag == "toolbox":
                    # Install all from tool_conf
                    tool_path = root.get('tool_path', '')
                    tool_path = tool_path.replace(
                        '${tool_conf_dir}',
                        os.path.abspath(os.path.dirname(tool_conf_path)))
                    if tool_path:
                        log.info("Searching for tools relative to " +
                                 tool_path)
                    tools = root.findall(".//tool[@file]")
                    if len(tools) == 0:
                        log.warning("No tools found in tool_conf")
                        continue

                    for tool in tools:
                        tool_id = ET.ElementTree(file=os.path.join(
                            tool_path, tool.get('file'))).getroot().get('id')
                        if tool_id:
                            log.info("Installing tool dependencies for " +
                                     tool_id + " from: " + tool.get('file'))
                            _install(tool_client, tool_id)
                elif root.tag == "tool" and root.get('id'):
                    # Install from single tool file
                    log.info("Tool xml found. Installing " + root.get('id') +
                             " dependencies..")
                    _install(tool_client, root.get('id'))
            else:
                log.info("YAML tool list found, parsing..")
                with open(tool_conf_path) as fh:
                    tool_ids = yaml.safe_load(fh)
                for tool_id in tool_ids:
                    # Install from yaml file
                    log.info("Installing " + tool_id + " dependencies..")
                    _install(tool_client, tool_id)

    if args.id:
        for tool_id in args.id:  # type: str
            log.info("Installing " + tool_id + " dependencies..")
            _install(tool_client, tool_id.strip())
Exemplo n.º 10
0
def put(filename,
        file_type='auto',
        history_id=None,
        use_objects=DEFAULT_USE_OBJECTS):
    """
        Given a filename of any file accessible to the docker instance, this
        function will upload that file to galaxy using the current history.
        Does not return anything.
    """
    conf = _get_conf()
    gi = get_galaxy_connection(use_objects)
    history_id = history_id or _get_history_id()
    if use_objects:
        history = gi.histories.get(history_id)
        history.upload_dataset(filename, file_type=file_type)
    else:
        tc = ToolClient(gi)
        tc.upload_file(filename, history_id, file_type=file_type)
Exemplo n.º 11
0
 def __init__(self, galaxy_instance, configuration):
     """
     :param galaxy_instance: A GalaxyInstance object (import from bioblend.galaxy)
     :param configuration: A dictionary. Examples in the ephemeris documentation.
     """
     self.gi = galaxy_instance
     self.config = configuration
     self.tool_data_client = ToolDataClient(self.gi)
     self.tool_client = ToolClient(self.gi)
     self.possible_name_keys = ['name', 'sequence_name']  # In order of importance!
     self.possible_value_keys = ['value', 'sequence_id', 'dbkey']  # In order of importance!
     self.data_managers = self.config.get('data_managers')
     self.genomes = self.config.get('genomes', '')
     self.source_tables = DEFAULT_SOURCE_TABLES
     self.fetch_jobs = []
     self.skipped_fetch_jobs = []
     self.index_jobs = []
     self.skipped_index_jobs = []
Exemplo n.º 12
0
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)

    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('/')[-1][0]
        #print replicate
        if replicate == 'X':

            base = f['name'].split('/')[-1].split('.')[0]
            #print base
            forward_name = f['name']
            forward_id = f['id']
            print forward_name

            new_history_name = base
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            workflows = workflowsClient.get_workflows(
                name="Maize Small samples HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    historyClient = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)
    datasetClient = DatasetClient(galaxyInstance)

    histories = historyClient.get_histories(deleted=False)
    for hist in histories:
        hist_id = hist['id']
        countSecondary = historyClient.show_matching_datasets(
            hist_id, name_filter=name_filter)
        if len(countSecondary) != 0:
            #print(countSecondary)
            file_path = dir_name + '/' + hist[
                'name'] + '_' + name_filter + '.' + ext
            #print(file_path)
            #print(countSecondary[0]['dataset_id'])
            datasetClient.download_dataset(countSecondary[0]['id'],
                                           file_path=file_path,
                                           use_default_filename=False)
    sys.exit()
Exemplo n.º 14
0
def runWorkflow(argDictionary, comparisons,samples):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import tempfile
    
    
    import time
    api_key = ''
    galaxy_host = ''

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(argDictionary['accessionNumber'])
    
    comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt')
    sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular')
    
    if argDictionary['site'] == "ENA":
        #fastqs available on ENA    
        tool_inputs = {
                "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'}
                
            }
        
    
        #run the tool to get the data from ENA
        tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs)
        
        #we want to wait until we have all datasets
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
            
        
        #sleep until all the fastq files are findable
        time.sleep(120)
        
        
        dirpath = tempfile.mkdtemp()
        fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0]
        fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath)
        num_lines = sum(1 for line in open(fileList)) -1
        
        datasets=list()
        while len(datasets)!=num_lines:
                    time.sleep(10)
                    datasets = getDatasetsByApproxName("fastq",history,history_client )                
    else: #for SRA       
    
        if argDictionary['single'] == "TRUE":
            with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
                for sample in reader:
                    print (sample)
                    fileNames=str.split(sample["File"],"|")
                    for fileName in fileNames:                    
                        tool_inputs = {
                                "input|input_select":"accession_number",
                                "outputformat":"fastqsanger.gz",
                                "input|accession":fileName   
                            }
                        #run the tool to get the single data from SRA
                        tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs)
               
        else:
             with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
           
                for sample in reader:            
                    tool_inputs = {
                            "accession_number":sample["File"]           
                        }
                    #run the tool to get the paired data from SRA
                    tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs)
                
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
     
    datasets = getDatasetsByApproxName("fastq",history,history_client )
    #get the fastQC tool
    for fastq in datasets:
        try:
            tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}}
            tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs)
        except Exception:
            pass
        
    #wait till complete
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    #make dataset collections for quantification using the fastq files
    collections=list()
    with open(samples) as tsvfile:
        reader = csv.DictReader(tsvfile, delimiter='\t')
        for row in reader:
            datasets=list()
            fileNames=str.split(row["File"],"|")
            
            for fileName in fileNames:
                datasets= datasets + getDatasetsByApproxName(fileName,history,history_client )
                    
            #make list of datasets
            collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client))
            
            
            
    #get the correct kallisto index
    species = argDictionary['species'].lower()
    index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile")
    index = {'id': index, 'src': 'hda'}
    
    #run kallisto for every dataset collection
    for collection in collections:
        #set up the tool_inputs
        tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]}
        
        
        #often encounter connection broken error - possible problem with Certus server?
        #bypass by ignoring the exception
        tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs)


    # we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    # Run multiqc on kallisto logs and fastqc files
    datasets = getDatasetsByApproxName("RawData",history,history_client )
    kallistoLogs = getDatasetsByApproxName(".log", history, history_client)
    
    tool_inputs = {}
    for i, dataset in enumerate(datasets+kallistoLogs):
        if not dataset["deleted"]:
            if dataset in datasets:
                software = 'fastqc'
            else:
                software = 'kallisto'
            params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']}
            tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params})

#    #summarise with the multiQC tool
    tool_client.run_tool(history['id'],'multiqc', tool_inputs)
    
    multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0]
    
        
    #get all the abundance files to convert to gene level counts matrix
    datasets = getDatasetsByApproxName(".abundance",history,history_client )
    
    #make a dataset collection for to make a countsMatrix
    collection = makeDataSetCollection(datasets,"abundances",history,history_client)
    
    
    #set up the tool_inputs
    tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']}
    
    #convert abundances to gene level counts matrix
    tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    txi = getDatasetsByApproxName("txi",history,history_client)
    

    #set up the tool_inputs for PCA
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs)
    
    pca = getDatasetsByApproxName("PCA",history,history_client)[0]
    
       
    #set up the tool_inputs for DESeq2
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,
    'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs)
         
    #run chrdir
    tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs)
    
        #we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    #get the foldchange data, cut and run pathway workflow    
    dataset_id = getFoldChangeData(history, history_client)['id']
    
    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        #tool_client.upload_file(outFileName, history['id'], file_type='tsv')
        
        return return_collection
    else:  
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
       
        if argDictionary['species'] == "Mouse":  
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

    
        
            #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        
        return return_collection
Exemplo n.º 15
0
#!/usr/bin/python

# version 1.0.0  18-Apr-2018
"""
This tools imports a data set into galaxy


"""

import sys

###REPLACE###  sys.path.append('/PATH/TO/lib/python2.7/site-packages/bioblend-0.8.0-py2.7.egg')
###REPLACE###  sys.path.append('/PATH/TOlib/python2.7/site-packages/requests_toolbelt-0.7.0-py2.7.egg')

from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy.tools import ToolClient

###REPLACE###  url = "GALAXY URL"
###REPLACE###  key = "API KEY of an ADMIN USER"

history_id = sys.argv[1]
file_path = sys.argv[2]

#print("Initiating Galaxy connection")

gi = GalaxyInstance(url=url, key=key)

toolClient = ToolClient(gi)

uploadedFile = toolClient.upload_file(file_path, history_id)
class DataManagers:
    def __init__(self, galaxy_instance, configuration):
        """
        :param galaxy_instance: A GalaxyInstance object (import from bioblend.galaxy)
        :param configuration: A dictionary. Examples in the ephemeris documentation.
        """
        self.gi = galaxy_instance
        self.config = configuration
        self.tool_data_client = ToolDataClient(self.gi)
        self.tool_client = ToolClient(self.gi)
        self.possible_name_keys = ['name',
                                   'sequence_name']  # In order of importance!
        self.possible_value_keys = ['value', 'sequence_id',
                                    'dbkey']  # In order of importance!
        self.data_managers = self.config.get('data_managers')
        self.genomes = self.config.get('genomes', '')
        self.source_tables = DEFAULT_SOURCE_TABLES
        self.fetch_jobs = []
        self.skipped_fetch_jobs = []
        self.index_jobs = []
        self.skipped_index_jobs = []

    def initiate_job_lists(self):
        """
        Determines which data managers should be run to populate the data tables.
        Distinguishes between fetch jobs (download files) and index jobs.
        :return: populate self.fetch_jobs, self.skipped_fetch_jobs, self.index_jobs and self.skipped_index_jobs
        """
        self.fetch_jobs = []
        self.skipped_fetch_jobs = []
        self.index_jobs = []
        self.skipped_index_jobs = []
        for dm in self.data_managers:
            jobs, skipped_jobs = self.get_dm_jobs(dm)
            if self.dm_is_fetcher(dm):
                self.fetch_jobs.extend(jobs)
                self.skipped_fetch_jobs.extend(skipped_jobs)
            else:
                self.index_jobs.extend(jobs)
                self.skipped_index_jobs.extend(skipped_jobs)

    def get_dm_jobs(self, dm):
        """Gets the job entries for a single dm. Puts entries that already present in skipped_job_list.
        :returns job_list, skipped_job_list"""
        job_list = []
        skipped_job_list = []
        items = self.parse_items(dm.get('items', ['']))
        for item in items:
            dm_id = dm['id']
            params = dm['params']
            inputs = dict()
            # Iterate over all parameters, replace occurences of {{item}} with the current processing item
            # and create the tool_inputs dict for running the data manager job
            for param in params:
                key, value = list(param.items())[0]
                value_template = Template(value)
                value = value_template.render(item=item)
                inputs.update({key: value})

            job = dict(tool_id=dm_id, inputs=inputs)

            data_tables = dm.get('data_table_reload', [])
            if self.input_entries_exist_in_data_tables(data_tables, inputs):
                skipped_job_list.append(job)
            else:
                job_list.append(job)
        return job_list, skipped_job_list

    def dm_is_fetcher(self, dm):
        """Checks whether the data manager fetches a sequence instead of indexing.
        This is based on the source table.
        :returns True if dm is a fetcher. False if it is not."""
        data_tables = dm.get('data_table_reload', [])
        for data_table in data_tables:
            if data_table in self.source_tables:
                return True
        return False

    def data_table_entry_exists(self, data_table_name, entry, column='value'):
        """Checks whether an entry exists in the a specified column in the data_table."""
        try:
            data_table_content = self.tool_data_client.show_data_table(
                data_table_name)
        except Exception:
            raise Exception('Table "%s" does not exist' % data_table_name)

        try:
            column_index = data_table_content.get('columns').index(column)
        except IndexError:
            raise IndexError('Column "%s" does not exist in %s' %
                             (column, data_table_name))

        for field in data_table_content.get('fields'):
            if field[column_index] == entry:
                return True
        return False

    def input_entries_exist_in_data_tables(self, data_tables, input_dict):
        """Checks whether name and value entries from the input are already present in the data tables.
        If an entry is missing in of the tables, this function returns False"""
        value_entry = get_first_valid_entry(input_dict,
                                            self.possible_value_keys)
        name_entry = get_first_valid_entry(input_dict, self.possible_name_keys)

        # Return False if name and value entries are both None
        if not value_entry and not name_entry:
            return False

        # Check every data table for existence of name and value
        # Return False as soon as entry is not present
        for data_table in data_tables:
            if value_entry:
                if not self.data_table_entry_exists(
                        data_table, value_entry, column='value'):
                    return False
            if name_entry:
                if not self.data_table_entry_exists(
                        data_table, name_entry, column='name'):
                    return False
        # If all checks are passed the entries are present in the database tables.
        return True

    def parse_items(self, items):
        """
        Parses items with jinja2.
        :param items: the items to be parsed
        :return: the parsed items
        """
        if bool(self.genomes):
            items_template = Template(json.dumps(items))
            rendered_items = items_template.render(
                genomes=json.dumps(self.genomes))
            # Remove trailing " if present
            rendered_items = rendered_items.strip('"')
            items = json.loads(rendered_items)
        return items

    def run(self, log=None, ignore_errors=False, overwrite=False):
        """
        Runs the data managers.
        :param log: The log to be used.
        :param ignore_errors: Ignore erroring data_managers. Continue regardless.
        :param overwrite: Overwrite existing entries in data tables
        """
        self.initiate_job_lists()
        all_succesful_jobs = []
        all_failed_jobs = []
        all_skipped_jobs = []

        if not log:
            log = logging.getLogger()

        def run_jobs(jobs, skipped_jobs):
            job_list = []
            for skipped_job in skipped_jobs:
                if overwrite:
                    log.info(
                        '%s already run for %s. Entry will be overwritten.' %
                        (skipped_job["tool_id"], skipped_job["inputs"]))
                    jobs.append(skipped_job)
                else:
                    log.info('%s already run for %s. Skipping.' %
                             (skipped_job["tool_id"], skipped_job["inputs"]))
                    all_skipped_jobs.append(skipped_job)
            for job in jobs:
                started_job = self.tool_client.run_tool(
                    history_id=None,
                    tool_id=job["tool_id"],
                    tool_inputs=job["inputs"])
                log.info(
                    'Dispatched job %i. Running DM: "%s" with parameters: %s' %
                    (started_job['outputs'][0]['hid'], job["tool_id"],
                     job["inputs"]))
                job_list.append(started_job)

            successful_jobs, failed_jobs = wait(self.gi, job_list, log)
            if failed_jobs:
                if not ignore_errors:
                    log.error('Not all jobs successful! aborting...')
                    raise RuntimeError('Not all jobs successful! aborting...')
                else:
                    log.warning('Not all jobs successful! ignoring...')
            all_succesful_jobs.extend(successful_jobs)
            all_failed_jobs.extend(failed_jobs)

        log.info(
            "Running data managers that populate the following source data tables: %s"
            % self.source_tables)
        run_jobs(self.fetch_jobs, self.skipped_fetch_jobs)
        log.info("Running data managers that index sequences.")
        run_jobs(self.index_jobs, self.skipped_index_jobs)

        log.info('Finished running data managers. Results:')
        log.info('Successful jobs: %i ' % len(all_succesful_jobs))
        log.info('Skipped jobs: %i ' % len(all_skipped_jobs))
        log.info('Failed jobs: %i ' % len(all_failed_jobs))
        InstallResults = namedtuple(
            "InstallResults",
            ["successful_jobs", "failed_jobs", "skipped_jobs"])
        return InstallResults(successful_jobs=all_succesful_jobs,
                              failed_jobs=all_failed_jobs,
                              skipped_jobs=all_skipped_jobs)
def get_tool_panel(gi):
    tool_client = ToolClient(gi)
    return tool_client.get_tool_panel()
Exemplo n.º 18
0
        parser.read(sys.argv[1])
    else:
        print "You passed %s I need a .ini file" %(sys.argv[1],)
        sys.exit(1)
else:
    parser.read('configuration.ini')
api_key = get_api_key(parser.get('Globals', 'api_file'))
galaxy_host = parser.get('Globals', 'galaxy_host')

file_name_re = re.compile(parser.get('Globals', 'sample_re'))



galaxyInstance = GalaxyInstance(galaxy_host, key=api_key)
historyClient = HistoryClient(galaxyInstance)
toolClient = ToolClient(galaxyInstance)
workflowClient = WorkflowClient(galaxyInstance)
dataSetClient = DatasetClient(galaxyInstance)

files = get_files(parser.get('Globals','fastq_dir'))
if len(files) == 0:
        print "Not able to find any fastq files looked in %s" %(parser.get('Globals', 'fastq_dir'))
else:
    print "Found fastq files running workflow for the following files (R2's will be added)"
    print ",".join(files)
    files_to_keep = {}
    for R1 in files:
        input_dir_path = os.path.dirname(R1)+"/"
        R2 = R1.replace('R1','R2')
        if not os.path.exists(R1):
            print "%s File Not Found" % (R1, )
Exemplo n.º 19
0
 def toolbox(self):
     """
     Gets the toolbox elements from <galaxy_url>/api/tools
     """
     tool_client = ToolClient(self.gi)
     return tool_client.get_tool_panel()
Exemplo n.º 20
0
    def retrieve_tools_from_instance(cls, instance):

        galaxy_instance = GalaxyInstance(url=instance.url)
        tool_client = ToolClient(galaxy_instance)
        for element in tool_client.get_tools():
            if element['model_class'] == 'Tool':

                tool_name = element['id']
                if '/' in tool_name:
                    tool_name = tool_name.split('/')[-2]

                tool = Tool.query.filter_by(name=tool_name).first()
                if tool is None:
                    tool = Tool(name=tool_name)
                    db.session.add(tool)

                tool.description = element['description']
                tool.display_name = element['name']
                if 'link' in element:
                    link = element.get('link', None)
                    link_start = link.find('/tool_runner')
                    if link_start != -1:
                        tool.link = link[link_start:]

                for edam_opetation_id in element.get('edam_operations', []):
                    edam_operation = EDAMOperation.get_from_id(
                        edam_opetation_id, allow_creation=True)
                    if edam_operation is not None and edam_operation not in tool.edam_operations:
                        tool.edam_operations.append(edam_operation)

                if 'tool_shed_repository' in element:
                    tool_version = ToolVersion.query\
                                              .filter_by(name=tool_name)\
                                              .filter_by(changeset=element['tool_shed_repository']['changeset_revision'])\
                                              .filter_by(tool_shed=element['tool_shed_repository']['tool_shed'])\
                                              .filter_by(owner=element['tool_shed_repository']['owner'])\
                                              .first()
                else:
                    tool_version = ToolVersion.query\
                                              .filter_by(name=tool_name)\
                                              .filter_by(version=element['version'])\
                                              .filter_by(tool_shed=None)\
                                              .filter_by(owner=None)\
                                              .first()
                if tool_version is None:
                    tool_version = ToolVersion(name=tool_name,
                                               version=element['version'])
                    db.session.add(tool_version)

                if 'tool_shed_repository' in element:
                    tool_version.changeset = element['tool_shed_repository'][
                        'changeset_revision']
                    tool_version.tool_shed = element['tool_shed_repository'][
                        'tool_shed']
                    tool_version.owner = element['tool_shed_repository'][
                        'owner']

                if instance not in tool_version.instances:
                    tool_version.instances.append(instance)

                if tool_version not in tool.versions:
                    tool.versions.append(tool_version)

                db.session.commit()
Exemplo n.º 21
0
    def retrieve_tools_from_instance(cls, instance):

        galaxy_instance = GalaxyInstance(url=instance.url)
        tool_client = ToolClient(galaxy_instance)
        for element in tool_client.get_tools():
            if element['model_class'] == 'Tool':

                tool_name = element['id']
                if '/' in tool_name:
                    tool_name = tool_name.split('/')[-2]

                try:
                    tool = Tool.objects.get(name=tool_name)
                except Tool.DoesNotExist:
                    tool = Tool(name=tool_name)

                tool.description = element['description']
                tool.display_name = element['name']

                for edam_opetation_id in element['edam_operations']:
                    edam_operation = EDAMOperation.get_from_id(
                        edam_opetation_id, allow_creation=True)
                    if edam_operation is not None and edam_operation not in tool.edam_operations:
                        tool.edam_operations.append(edam_operation)

                try:
                    if 'tool_shed_repository' in element:
                        tool_version = ToolVersion.objects.get(
                            name=tool_name,
                            changeset=element['tool_shed_repository']
                            ['changeset_revision'],
                            tool_shed=element['tool_shed_repository']
                            ['tool_shed'],
                            owner=element['tool_shed_repository']['owner'])
                    else:
                        tool_version = ToolVersion.objects.get(
                            name=tool_name,
                            version=element['version'],
                            tool_shed=None,
                            owner=None)
                except ToolVersion.DoesNotExist:
                    tool_version = ToolVersion(name=tool_name,
                                               version=element['version'])

                if 'tool_shed_repository' in element:
                    tool_version.changeset = element['tool_shed_repository'][
                        'changeset_revision']
                    tool_version.tool_shed = element['tool_shed_repository'][
                        'tool_shed']
                    tool_version.owner = element['tool_shed_repository'][
                        'owner']

                if instance not in tool_version.instances:
                    tool_version.instances.append(instance)

                tool_version.save()

                if tool_version not in tool.versions:
                    tool.versions.append(tool_version)

                tool.save()
Exemplo n.º 22
0
 def toolbox(self):
     """
     Gets the toolbox elements from <galaxy_url>/api/tools
     """
     tool_client = ToolClient(self.gi)
     return tool_client.get_tool_panel()
#!/usr/bin/env python
"""
Use the bioblend API to create a fresh history and add a set of files to the history that were imported into the container during the build
Usage: create_and_upload_history.py history_name url1 url2 url3 ...
"""
import sys
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy.histories import HistoryClient
from bioblend.galaxy.tools import ToolClient

gi = GalaxyInstance(url='http://localhost:80', key='admin')


tc = ToolClient(gi)
lc = HistoryClient(gi)
details = lc.create_history(sys.argv[1])

print "HIST ID: %s" % details["id"]
i = 0
for url in sys.argv:
    url_parts = url.split("/")
    fname = url_parts[-1]
    if i < 2:
        i+=1
        continue
    i+=1
    print "submitting %s as %s" % (url,fname)
    tc.put_url(url,details["id"],file_name=fname)


Exemplo n.º 24
0
def runWorkflow(argDictionary, comparisons):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import time
    
    api_key = ''
    galaxy_host = 'http://localhost:8080/'

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(row['accessionNumber'])
    # Import the galaxy workflow
    workflow = workflow_client.show_workflow('a799d38679e985db')

    input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt')

    # Run workflow on csv data to create a new history.
    params = dict()
    for key in workflow['steps'].keys():
        params[key] = argDictionary
    
    datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}}

    workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    
    dataset_id = getFoldChangeData(history, history_client)['id']

    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id),
    'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection
    else: 
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
        # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow
        # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow
        
        if argDictionary['species'] == "Mouse":  
    
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection
def get_tool_panel(gi):
    tool_client = ToolClient(gi)
    return tool_client.get_tool_panel()
Exemplo n.º 26
0
if __name__ == '__main__':
# GET PATH NAMES AND EXTENSIONS FROM COMMAND LINE INPUT
    input_file_full = sys.argv[1]
    input_file_format = input_file_full[input_file_full.rfind(".")+1:len(input_file_full)]

    output_file_full = sys.argv[2]
    output_file_format = output_file_full[output_file_full.rfind(".")+1:len(output_file_full)]

# CHOOSE CONVERTER
    tool_id = choose_converter(input_file_format,output_file_format)

# INITIALIZE GALAXY
    galaxy_instance = GalaxyInstance(url=base_url, key=apikey)
    history_client = HistoryClient(galaxy_instance)
    tool_client = ToolClient(galaxy_instance)
    dataset_client = DatasetClient(galaxy_instance)
    history = history_client.create_history('tmp')

# UPLOAD FILES
    input_file_1 = tool_client.upload_file(input_file_full, history['id'], type='txt')
    input_file_2 = tool_client.upload_file(input_file_full, history['id'], type='txt')
    params = {'input_numbers_001':{'src': 'hda', 'id': input_file_1['outputs'][0]['id']},'input_numbers_002':{'src': 'hda', 'id': input_file_2['outputs'][0]['id']}}
    wait_4_process(history['id'],"uploading files")

# RUN CONVERSION
    runtool_output = tool_client.run_tool(history_id=history['id'], tool_id=tool_id, tool_inputs=params)
    wait_4_process(history['id'],"running tool")

# DOWNLOAD CONVERTED FILE
    download_output = dataset_client.download_dataset(runtool_output['jobs'][0]['id'],output_file_full, use_default_filename=False)
Exemplo n.º 27
0
def get_tools(gi):
    tool_client = ToolClient(gi)
    return tool_client.get_tools()