Пример #1
0
def buildInteractions(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None,
                      geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None,
                      obligatorySet=None,secondarySet=None,IncludeExpIDs=False):
    
    global degrees
    global outputDir
    global inputDataType
    global obligatoryList ### Add these if connected to anything
    global secondaryQueryIDs
    global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to
    global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs
    global FileName
    global intNameShort
    secondaryQueryIDs = {}
    degrees = Degrees
    outputDir = outputdir
    inputDataType = inputType
    obligatoryList = obligatorySet
    secondDegreeObligatoryCategories=[]
    intNameShort=''
    if obligatoryList == None:
        obligatoryList=[]
    if expressionFile == None:
        expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes
    if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant
        ### This currently adds alot of predictions - either make more stringent or currently exclude
        secondDegreeObligatoryCategories = secondarySet
    if PathwayFilter != None:
        if len(PathwayFilter)==1:
            FileName = PathwayFilter[0]
        if isinstance(PathwayFilter, tuple) or isinstance(PathwayFilter, list):
            FileName = string.join(list(PathwayFilter),' ')
            FileName = string.replace(FileName,':','-')
        else:
            FileName = PathwayFilter
        if len(FileName)>40:
            FileName = FileName[:40]
    elif OntologyID != None: FileName = OntologyID
    elif Genes != None: FileName = Genes
    
    ### Import Ensembl-Symbol annotations
    getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt')
    if len(interactionDirs[0]) == 1: interactionDirs = [interactionDirs]    
    ### Import interaction databases indicated in interactionDirs
    for i in interactionDirs:
        print i
        i = export.findFilename(i)
        i=string.split(i,'-')[1]
        intNameShort+=i[0]

    importInteractionData(interactionDirs)
    getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) 
    
    input_IDs = getGeneIDs(Genes)
    try:
        if isinstance(PathwayFilter, tuple):
            for pathway in PathwayFilter:
                IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,pathway,OntologyID,directory)
                for id in IDs:input_IDs[id]=None
        else:
            input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory)
    except Exception: None
    if expressionFile == None or len(expressionFile)==0:
        expressionFile = exportSelectedIDs(input_IDs) ### create an expression file
    elif IncludeExpIDs: ### Prioritize selection of IDs for interactions WITH the primary query set (not among expression input IDs)
        secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0]
    input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs)
    sif_file,symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file)
    output_filename = exportGraphImage(species,sif_file,expressionFile)
    return output_filename
Пример #2
0
def buildInteractions(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None,
                      geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None,
                      obligatorySet=None,secondarySet=None,IncludeExpIDs=False):
    
    global degrees
    global outputDir
    global inputDataType
    global obligatoryList ### Add these if connected to anything
    global secondaryQueryIDs
    global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to
    global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs
    global FileName
    global intNameShort
    secondaryQueryIDs = {}
    degrees = Degrees
    outputDir = outputdir
    inputDataType = inputType
    obligatoryList = obligatorySet
    secondDegreeObligatoryCategories=[]
    intNameShort=''
    if obligatoryList == None:
        obligatoryList=[]
    if expressionFile == None:
        expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes
    if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant
        ### This currently adds alot of predictions - either make more stringent or currently exclude
        secondDegreeObligatoryCategories = secondarySet
    if PathwayFilter != None:
        if len(PathwayFilter)==1:
            FileName = PathwayFilter[0]
        if isinstance(PathwayFilter, tuple) or isinstance(PathwayFilter, list):
            FileName = string.join(list(PathwayFilter),' ')
            FileName = string.replace(FileName,':','-')
        else:
            FileName = PathwayFilter
        if len(FileName)>40:
            FileName = FileName[:40]
    elif OntologyID != None: FileName = OntologyID
    elif Genes != None: FileName = Genes
    
    ### Import Ensembl-Symbol annotations
    getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt')
    if len(interactionDirs[0]) == 1: interactionDirs = [interactionDirs]    
    ### Import interaction databases indicated in interactionDirs
    for i in interactionDirs:
        print i
        i = export.findFilename(i)
        i=string.split(i,'-')[1]
        intNameShort+=i[0]

    importInteractionData(interactionDirs)
    getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) 
    
    input_IDs = getGeneIDs(Genes)
    try:
        if isinstance(PathwayFilter, tuple):
            for pathway in PathwayFilter:
                IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,pathway,OntologyID,directory)
                for id in IDs:input_IDs[id]=None
        else:
            input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory)
    except Exception: None
    if expressionFile == None or len(expressionFile)==0:
        expressionFile = exportSelectedIDs(input_IDs) ### create an expression file
    elif IncludeExpIDs: ### Prioritize selection of IDs for interactions WITH the primary query set (not among expression input IDs)
        secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0]
    input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs)
    sif_file,symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file)
    output_filename = exportGraphImage(species,sif_file,expressionFile)
    return output_filename
Пример #3
0
def interactionPermuteTest(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None,
                      geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None,
                      obligatorySet=None,secondarySet=None,IncludeExpIDs=False):

    global degrees
    global outputDir
    global inputDataType
    global obligatoryList ### Add these if connected to anything
    global secondaryQueryIDs
    global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to
    global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs
    global FileName
    secondaryQueryIDs = {}
    degrees = Degrees
    outputDir = outputdir
    inputDataType = inputType
    obligatoryList = obligatorySet
    secondDegreeObligatoryCategories=[]
    if obligatoryList == None:
        obligatoryList=[]
    if expressionFile == None:
        expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes
    if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant
        ### This currently adds alot of predictions - either make more stringent or currently exclude
        secondDegreeObligatoryCategories = secondarySet
    if PathwayFilter != None: FileName = PathwayFilter
    elif OntologyID != None: FileName = OntologyID
    elif Genes != None: FileName = Genes
    
    ### Import Ensembl-Symbol annotations
    getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt')
    
    ### Import interaction databases indicated in interactionDirs
    importInteractionData(interactionDirs)
    getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) 
    input_IDs = getGeneIDs(Genes)
    try: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory)
    except Exception: None
    permutations = 10000; p = 0
    secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0]
    input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) ### Get the number of unique genes
    sif_file, original_symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file)
    #print len(original_symbol_pair_unique)
    ensembl_unique = map(lambda x: x, ensembl_symbol_db)
    
    interaction_lengths = []
    import random
    while p < permutations:
        random_inputs = random.sample(ensembl_unique,len(input_IDs))
        random_input_db={}
        #print len(random_inputs), len(input_IDs); sys.exit()
        for i in random_inputs: random_input_db[i]=i
        secondaryQueryIDs = importqueryResults(species,random_inputs,{})[0]
        input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs)
        sif_file, symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,inputDir)
        #print len(symbol_pair_unique);sys.exit()
        interaction_lengths.append(len(symbol_pair_unique))
        p+=1

    interaction_lengths.sort(); interaction_lengths.reverse()
    y = len(original_symbol_pair_unique)
    print 'permuted length distribution:',interaction_lengths
    print 'original length:',y
    k=0
    for i in interaction_lengths:
        if i>=y: k+=1
    
    print 'p-value:',float(k)/float(permutations)
Пример #4
0
def interactionPermuteTest(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None,
                      geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None,
                      obligatorySet=None,secondarySet=None,IncludeExpIDs=False):

    global degrees
    global outputDir
    global inputDataType
    global obligatoryList ### Add these if connected to anything
    global secondaryQueryIDs
    global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to
    global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs
    global FileName
    secondaryQueryIDs = {}
    degrees = Degrees
    outputDir = outputdir
    inputDataType = inputType
    obligatoryList = obligatorySet
    secondDegreeObligatoryCategories=[]
    if obligatoryList == None:
        obligatoryList=[]
    if expressionFile == None:
        expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes
    if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant
        ### This currently adds alot of predictions - either make more stringent or currently exclude
        secondDegreeObligatoryCategories = secondarySet
    if PathwayFilter != None: FileName = PathwayFilter
    elif OntologyID != None: FileName = OntologyID
    elif Genes != None: FileName = Genes
    
    ### Import Ensembl-Symbol annotations
    getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt')
    
    ### Import interaction databases indicated in interactionDirs
    importInteractionData(interactionDirs)
    getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) 
    input_IDs = getGeneIDs(Genes)
    try: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory)
    except Exception: None
    permutations = 10000; p = 0
    secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0]
    input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) ### Get the number of unique genes
    sif_file, original_symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file)
    #print len(original_symbol_pair_unique)
    ensembl_unique = map(lambda x: x, ensembl_symbol_db)
    
    interaction_lengths = []
    import random
    while p < permutations:
        random_inputs = random.sample(ensembl_unique,len(input_IDs))
        random_input_db={}
        #print len(random_inputs), len(input_IDs); sys.exit()
        for i in random_inputs: random_input_db[i]=i
        secondaryQueryIDs = importqueryResults(species,random_inputs,{})[0]
        input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs)
        sif_file, symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,inputDir)
        #print len(symbol_pair_unique);sys.exit()
        interaction_lengths.append(len(symbol_pair_unique))
        p+=1

    interaction_lengths.sort(); interaction_lengths.reverse()
    y = len(original_symbol_pair_unique)
    print 'permuted length distribution:',interaction_lengths
    print 'original length:',y
    k=0
    for i in interaction_lengths:
        if i>=y: k+=1
    
    print 'p-value:',float(k)/float(permutations)