def main2(): #336,365 pair = [336,365] history = np.load('stats3.npy') extracts = [] for element in history: if element[0] in pair: extracts.append(element) if len(extracts) == 2: break print extracts reader = libsbml.SBMLReader() document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000336.xml') parser = libsbml2bngl.SBML2BNGL(document.getModel()) rdfAnnotations = analyzeRDF.getAnnotations(parser,'miriam') print rdfAnnotations document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000365.xml') parser = libsbml2bngl.SBML2BNGL(document.getModel()) rdfAnnotations2 = analyzeRDF.getAnnotations(parser,'miriam') print rdfAnnotations2 for element in rdfAnnotations: if element in rdfAnnotations2: print element,rdfAnnotations[element],rdfAnnotations2[element]
def createSpeciesCompositionGraph(parser, database, configurationFile,namingConventions, speciesEquivalences=None,bioGridFlag=False): _, rules, _ = parser.getReactions(atomize=True) molecules, _, _,_ = parser.getSpecies() database.sbmlAnalyzer = \ analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences) #classify reactions database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\ indirectEquivalenceTranslator, \ adhocLabelDictionary,lexicalDependencyGraph= database.sbmlAnalyzer.classifyReactions(rules, molecules) referenceVariables = [database.classifications,equivalenceTranslator, database.eequivalenceTranslator,indirectEquivalenceTranslator,adhocLabelDictionary] comparisonVariables = [deepcopy(x) for x in referenceVariables] #####input processing #states,components,other user options #with open('temp1.dict','w') as f: # pickle.dump(referenceVariables,f) #with open('temp2.dict','w') as f: # pickle.dump(comparisonVariables,f) database.reactionProperties = database.sbmlAnalyzer.getReactionProperties() #user defined and lexical analysis naming conventions are stored here database.reactionProperties.update(adhocLabelDictionary) database.translator, database.labelDictionary, \ database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes() database.dependencyGraph = {} #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot') ####dependency graph #binding reactions for reaction, classification in zip(rules, database.classifications): bindingReactionsAnalysis(database.dependencyGraph, list(parseReactions(reaction)),classification) for element in lexicalDependencyGraph: database.dependencyGraph[element] = lexicalDependencyGraph[element] #catalysis reactions for key in database.eequivalenceTranslator: for namingEquivalence in database.eequivalenceTranslator[key]: baseElement = min(namingEquivalence, key=len) modElement = max(namingEquivalence, key=len) if key != 'Binding': if baseElement not in database.dependencyGraph or database.dependencyGraph[baseElement] == []: if modElement not in database.dependencyGraph or database.dependencyGraph[modElement] == []: database.dependencyGraph[baseElement] = [] #do we have a meaningful reverse dependence? #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]): # addToDependencyGraph(database.dependencyGraph,baseElement,[modElement]) # continue addToDependencyGraph(database.dependencyGraph, modElement, [baseElement]) #non lexical-analysis catalysis reactions if database.forceModificationFlag: for reaction, classification in zip(rules, database.classifications): if classification == 'Transformation': preaction = list(parseReactions(reaction)) if preaction[1][0] in preaction[0][0]: base = preaction[1][0] mod = preaction[0][0] else: mod = preaction[1][0] base = preaction[0][0] if database.dependencyGraph[mod] == []: database.dependencyGraph[mod] = [[base]] ''' #complex catalysis reactions for key in indirectEquivalenceTranslator: #first remove these entries from the dependencyGraph since #they are not true bindingReactions for namingEquivalence in indirectEquivalenceTranslator[key]: removedElement = '' tmp3 = deepcopy(namingEquivalence[1]) if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] else: tmp3.reverse() if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] #then add the new, true dependencies #if its not supposed to be a basic element tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]] tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]]) tmp2 = deepcopy(tmp) tmp2.reverse() ##TODO: map back for the elements in namingEquivalence[2] if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \ and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]: if sorted(tmp) == sorted(tmp3): continue if all(x in database.dependencyGraph for x in tmp): if removedElement in database.dependencyGraph: database.dependencyGraph[removedElement].remove(tmp3) logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\ from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\ .format(removedElement,tmp3,namingEquivalence[3][0],tmp)) database.dependencyGraph[namingEquivalence[3][0]] = [tmp] else: logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \ {2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0], tmp,removedElement,tmp3)) #user defined stuff ''' for element in database.labelDictionary: if len(database.labelDictionary[element][0]) == 0 or element == \ database.labelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: database.dependencyGraph[element] = [list( database.labelDictionary[element][0])] #stuff obtained from string similarity analysis for element in database.lexicalLabelDictionary: #similarity analysis has less priority than anything we discovered #before if element in database.dependencyGraph and \ len(database.dependencyGraph[element]) > 0: continue if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \ database.lexicalLabelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\ .format(element,database.lexicalLabelDictionary[element][0])) database.dependencyGraph[element] = [list( database.lexicalLabelDictionary[element][0])] #pure lexical analysis orphanedSpecies = [x for x in database.dependencyGraph if database.dependencyGraph[x] == []] strippedMolecules = [x.strip('()') for x in molecules] tmpDependency,database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification(orphanedSpecies,strippedMolecules) for species in tmpDependency: if tmpDependency[species] == []: addToDependencyGraph(database.dependencyGraph,species,[]) for instance in tmpDependency[species]: addToDependencyGraph(database.dependencyGraph,species,instance) #####sct #FIXME: wtf was unevenelementdict supposed to be for #print database.dependencyGraph prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \ consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer) return prunnedDependencyGraph,database
def loadAnnotations(fileName): reader = libsbml.SBMLReader() document = reader.readSBMLFromFile(fileName) parser = libsbml2bngl.SBML2BNGL(document.getModel()) rdfAnnotations = analyzeRDF.getAnnotations(parser, 'miriam') return rdfAnnotations
def identifyNamingConvention(): ''' extracts statistics from the code ''' reader = libsbml.SBMLReader() jsonFiles = [ f for f in listdir('./reactionDefinitions') if f[-4:-1] == 'jso'] translationLevel = [] arrayMolecules = [] rules = 0 #go through all curated models in the biomodels database for index in range(1,410): bestTranslator = {} try: nameStr = 'BIOMD0000000%03d' % (index) document = reader.readSBMLFromFile('XMLExamples/curated/' + nameStr + '.xml') parser = SBML2BNGL(document.getModel()) database = structures.Databases() print nameStr + '.xml', naming = 'reactionDefinition0.json' bestUseID = True numberofMolecules = numberOfSpecies = 0 #iterate through our naming conventions and selects that which #creates the most rulified elements in the translator for jsonFile in jsonFiles: for useID in [True,False]: try: oldmaxi = numberOfSpecies parser = SBML2BNGL(document.getModel(),useID) database = structures.Databases() translator = m2c.transformMolecules(parser,database,'reactionDefinitions/' + jsonFile) numberOfSpecies = max(numberOfSpecies,evaluation(len(parser.getSpecies()),translator)) if oldmaxi != numberOfSpecies: naming = jsonFile bestTranslator = translator bestUseID = useID _,rules,_ = parser.getReactions(translator) numberofMolecules = len(translator) except: print 'ERROR',sys.exc_info()[0] continue except: print 'ERROR',sys.exc_info()[0] continue _,_,obs = parser.getSpecies() rdfAnnotations = analyzeRDF.getAnnotations(parser,'miriam') #go through the annotation list and assign which species #correspond to which uniprot number (if it exists) #similarly list the number of times each individual element appears analyzeRDF.getAnnotations(parser,'miriam') molecules = {} if naming[-6] != 0: for element in bestTranslator: if len(bestTranslator[element].molecules) == 1: name = bestTranslator[element].molecules[0].name for annotation in rdfAnnotations: if name in rdfAnnotations[annotation]: if name not in molecules: molecules[name] = [0,[]] if annotation not in molecules[name][1]: molecules[name][1].extend(annotation) if name not in molecules: molecules[name] = [1,[]] for rule in rules: if name in rule: molecules[name][0] += 1 # _,rules,_ = parser.getReactions(bestTranslator) #for rule in rules: if len(obs) != 0: print index*1.0,int(naming[-6])*1.0,numberOfSpecies*1.0/len(obs),numberofMolecules*1.0/len(obs),len(obs)*1.0,bestUseID arrayMolecule = [[x,molecules[x]] for x in molecules] arrayMolecules.append(arrayMolecule) translationLevel.append([index*1.0,int(naming[-6])*1.0,numberOfSpecies*1.0/len(obs),numberofMolecules*1.0/len(obs),len(obs)*1.0,bestUseID]) np.save('stats3.npy',np.array(translationLevel)) else: arrayMolecules.append([]) #print arrayMolecules np.save('stats3b.npy',np.array(arrayMolecules))
def transformMolecules( parser, database, configurationFile, namingConventions, speciesEquivalences=None, bioGridFlag=False ): """ main method. Receives a parser configuration, a configurationFile and a list of user defined species equivalences and returns a dictionary containing an atomized version of the model Keywords: ---parser: data structure containing the reactions and species we will use ---database:data structure containing the result of the outgoing translation ---configurationFile ---speciesEquivalences:predefined species """ _, rules, _ = parser.getReactions(atomize=True) molecules, _, _, _ = parser.getSpecies() sbmlAnalyzer = analyzeSBML.SBMLAnalyzer(configurationFile, namingConventions, speciesEquivalences) # classify reactions classifications, equivalenceTranslator, eequivalenceTranslator, indirectEquivalenceTranslator = sbmlAnalyzer.classifyReactions( rules, molecules ) #####input processing # states,components,other user options database.reactionProperties = sbmlAnalyzer.getReactionProperties() database.translator, database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes() database.dependencyGraph = {} # analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser, "uniprot") ####dependency graph # binding reactions for reaction, classification in zip(rules, classifications): dependencyGraph(database.dependencyGraph, list(parseReactions(reaction)), classification) # catalysis reactions for key in eequivalenceTranslator: for namingEquivalence in eequivalenceTranslator[key]: baseElement = min(namingEquivalence, key=len) modElement = max(namingEquivalence, key=len) if key != "Binding": if baseElement not in database.dependencyGraph or database.dependencyGraph[baseElement] == []: if modElement not in database.dependencyGraph or database.dependencyGraph[modElement] == []: database.dependencyGraph[baseElement] = [] elif [baseElement] not in database.dependencyGraph[modElement]: addToDependencyGraph(database.dependencyGraph, baseElement, [modElement]) continue addToDependencyGraph(database.dependencyGraph, modElement, [baseElement]) # complex catalysis reactions for key in indirectEquivalenceTranslator: # first remove these entries from the dependencyGraph since # they are not true bindingReactions for namingEquivalence in indirectEquivalenceTranslator[key]: tmp = deepcopy(namingEquivalence[1]) if tmp in database.dependencyGraph[namingEquivalence[0][0]]: database.dependencyGraph[namingEquivalence[0][0]].remove(tmp) elif tmp in database.dependencyGraph[namingEquivalence[0][1]]: database.dependencyGraph[namingEquivalence[0][1]].remove(tmp) else: tmp.reverse() if tmp in database.dependencyGraph[namingEquivalence[0][0]]: database.dependencyGraph[namingEquivalence[0][0]].remove(tmp) elif tmp in database.dependencyGraph[namingEquivalence[0][1]]: database.dependencyGraph[namingEquivalence[0][1]].remove(tmp) # then add the new, true dependencies # if its not supposed to be a basic element tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]] tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]]) tmp2 = deepcopy(tmp) tmp2.reverse() ##TODO: map back for the elements in namingEquivalence[2] if ( tmp not in database.dependencyGraph[namingEquivalence[3][0]] and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]] ): if all(x in database.dependencyGraph for x in tmp): database.dependencyGraph[namingEquivalence[3][0]] = [tmp] # user defined stuff for element in database.labelDictionary: if len(database.labelDictionary[element][0]) == 0 or element == database.labelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: database.dependencyGraph[element] = [list(database.labelDictionary[element][0])] #####sct prunnedDependencyGraph, weights, unevenElementDict = consolidateDependencyGraph( database.dependencyGraph, equivalenceTranslator ) # FIXME: I'm conatminating these data structures somewhere. In here # im just calling the original generator to recover them. classifications, equivalenceTranslator, eequivalenceTranslator, indirectEquivalenceTranslator = sbmlAnalyzer.classifyReactions( rules, molecules ) weights = sorted(weights, key=lambda rule: rule[1]) # print {x:str(database.translator[x]) for x in database.translator} atomize( prunnedDependencyGraph, weights, database.translator, database.reactionProperties, eequivalenceTranslator, bioGridFlag, ) propagateChanges(database.translator, prunnedDependencyGraph) return database.translator
reader = libsbml.SBMLReader() #BIOMD0000000272 document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000272.xml') #document = reader.readSBMLFromFile('XMLExamples/simple4.xml') model = document.getModel() parser = SBML2BNGL(model) # print parser.getReactions() _,rules,_ = parser.getReactions() #print rules #print rules classifications = analyzeSBML.classifyReactions(rules) print classifications print 'preparing database...' rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot') for rule,classification in zip(rules,classifications): #print rule reaction2 = list(parseReactions(rule)) #print reaction2 totalElements = [item for sublist in reaction2 for item in sublist] totalElements = list(set(totalElements)) labelDictionary = defineCorrespondence(reaction2,totalElements, labelDictionary,rawDatabase, classification,rdfAnnotations) #print labelDictionary labelDictionary = resolveCorrespondence(labelDictionary) labelDictionary = resolveCorrespondence(labelDictionary) print 'label',labelDictionary #print labelDictionary #print labelDictionary
def transformMolecules(parser,database,configurationFile,speciesEquivalences=None): #labelDictionary = {} _,rules,_ = parser.getReactions() molecules,_,_ = parser.getSpecies() #synthesisdatabase = {} #translator = {} sbmlAnalyzer =analyzeSBML.SBMLAnalyzer(configurationFile,speciesEquivalences) classifications,equivalenceTranslator,eequivalenceTranslator = sbmlAnalyzer.classifyReactions(rules,molecules) database.reactionProperties = sbmlAnalyzer.getReactionProperties() database.translator,database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes() #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot') #print rdfAnnotations #print classifications #for element in equivalenceTranslator: # addToLabelDictionary(database.labelDictionary,element[0],min(element,key=len)) # addToLabelDictionary(database.labelDictionary,element[1],min(element,key=len)) #for element in database.labelDictionary: # database.labelDictionary[element] = [(min(database.labelDictionary[element],key=len),)] #STEP1: Use reaction information to infer w print zip(rules,classifications) for rule,classification in zip(rules,classifications): reaction2 = list(parseReactions(rule)) totalElements = [item for sublist in reaction2 for item in sublist] totalElements = list(set(totalElements)) #obtain elements from the equivalenceTranstor that contain at least two #reactants/products #equivalences = [x for x in equivalenceTranslator if x[0] in reaction2[0] or x[1] in reaction2[1]] database.labelDictionary = defineCorrespondence(reaction2,totalElements, database,classification, rdfAnnotations) #if 'ERKi_MEKi_PP' in database.labelDictionary: # print database.labelDictionary['ERKi_MEKi_PP'],rule #database.labelDictionary = resolveCorrespondence(database) #correctClassifications(rules,classifications,database.labelDictionary) #print 'step1',database.labelDictionary simplify(database.labelDictionary) #TODO: uncomment this section when we solve the bug on reclassifying #print database.labelDictionary cycles = resolveCycles(database,equivalenceTranslator) database.rawLabelDictionary = deepcopy(database.labelDictionary) for _ in range(0,5): database.labelDictionary = resolveCorrespondence(database,cycles) #print 'after resolving correspondences' classifications2,_,eequivalenceTranslator = sbmlAnalyzer.reclassifyReactions(rules,molecules,database.labelDictionary) for index in range(0,len(classifications)): if classifications[index] in ['None','Binding'] and classifications2[index] != 'None': classifications[index] = classifications2[index] tmp = {} tmp = {x:[database.labelDictionary[x]] for x in database.labelDictionary} database.labelDictionary = tmp #print 'step1.5',database.labelDictionary #STEP2: Use naming conventions for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): equivalence = [x for x in equivalenceTranslator if element == max(x,key=len)] if equivalence != []: defineCorrespondenceWithNamingConventions(element,equivalence,database,createOnDemand=True) #for _ in range(0,5): # database.labelDictionary = resolveCorrespondence(database,cycles) #print {x:type(database.labelDictionary[x]) == tuple for x in database.labelDictionary for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): equivalence = [x for x in equivalenceTranslator if element == max(x,key=len)] if equivalence != []: defineComplexCorrespondenceWithNamingConventions(element,equivalence,database,createOnDemand=False) #print 'step2',database.labelDictionary #STEP2.5: For those elements that where updated from the naming conventions #we also use the chance to update the reaction classification if necessary # correctClassifications(rules,classifications,database.labelDictionary) #STEP3: Use annotations sbmlAnalyzer.classifyReactionsWithAnnotations(rules,molecules,rdfAnnotations,database.labelDictionary) #for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): # annotation = [rdfAnnotations[x] for x in rdfAnnotations if element in rdfAnnotations[x]] # if annotation != []: # defineCorrespondenceWithAnnotations(element,annotation,database) simplify(database.labelDictionary) #print 'step3',database.labelDictionary #analyzeSBML.reclassifyReactions(reactions,molecules,labelDictionary,classifications,equivalenceTranslator) cycles = resolveCycles(database,equivalenceTranslator) #TODO: this is causing errors, check for _ in range(0,5): database.labelDictionary = resolveCorrespondence(database,cycles) correctClassificationsWithCycleInformation(rules,classifications,cycles) #print database.labelDictionary counter = 0 nonProcessedRules = zip(rules,classifications) #TODO:multipass stuff. #while len(nonProcessedRules) > 0: #tmp = [] ##sort. Primary key: number of predependencies a rule has #secondary key: length of the names of the elements involved ruleWeightTable = [] ruleWeight2Table= [] #print equivalenceTranslator for rule in rules: flag = False weight = 0 weight2 = 0 reaction2 = list(parseReactions(rule)) #sort rules according to the complexity of the reactants (not the products) for element in reaction2[0]: if element not in database.labelDictionary: weight =50 else: weight = max(weight,len(database.labelDictionary[element])) weight2 += len(element) for element in reaction2[0]: weight+= sum([1 for x in equivalenceTranslator if re.search(r'(_|^)({0})(_|$)'.format(x[1]),element) != None]) for element in reaction2[0]: weight += element.count('_') ruleWeight2Table.append(weight2) ruleWeightTable.append(weight) nonProcessedRules = zip(ruleWeightTable,ruleWeight2Table,rules,classifications) nonProcessedRules = sorted(nonProcessedRules,key=lambda rule: rule[1]) nonProcessedRules = sorted(nonProcessedRules,key=lambda rule: rule[0]) database.classifications = classifications for idx,(w0,w1,rule,classification) in enumerate(nonProcessedRules): outputFlag = False #if classification == 'Modification': # outputFlag = True counter += 1 reaction2 = list(parseReactions(rule)) if outputFlag: tmp = deepcopy(database.translator) print reaction2 processRule(reaction2,database,classification,eequivalenceTranslator,outputFlag) #if 'EGF_EGFR2_PLCg' in database.translator: # print rule,database.translator['EGF_EGFR2_PLCg'],classification #if 'EGF_EGFRm2_GAP_Grb2_Prot' in database.translator: # print '++++',rule,difflib.SequenceMatcher(None, 'Grb2(egfr,shc!10,sos).EGF(egfr!5,modI~U,modM~M).EGFR(egf!5,egfr!8,gap!9,grb2!11,modI~U,prot,ras_gdp,shc!10).EGF(egfr!7,modI~U,modM~U).EGFR(egf!7,egfr!8,gap!8,grb2,modI~U,prot,ras_gdp,shc!9).GAP(egfr!9).Prot(egfr!11,modI~U,ras_gdp,ras_gtp)' , str(database.translator['EGF_EGFRm2_GAP_Grb2_Prot'])).ratio() if outputFlag: print {x:str(database.translator[x]) for x in database.translator if x not in tmp} for element in database.labelDictionary: if not isinstance(database.labelDictionary[element],tuple): database.translator[element] = database.translator[database.labelDictionary[element]] raw = [x[0] for x in database.rawDatabase] for element in database.translator: if element in raw: continue for mol in database.translator[element].molecules: if mol.name in database.translator: mol.update(database.translator[mol.name].molecules[0]) return database.translator,logMess.log,parser.getSpeciesAnnotation()
def transformMolecules(parser, database, configurationFile, speciesEquivalences=None): #labelDictionary = {} _, rules, _ = parser.getReactions() molecules, _, _ = parser.getSpecies() #synthesisdatabase = {} #translator = {} sbmlAnalyzer = analyzeSBML.SBMLAnalyzer(configurationFile, speciesEquivalences) classifications, equivalenceTranslator, eequivalenceTranslator = sbmlAnalyzer.classifyReactions( rules, molecules) database.reactionProperties = sbmlAnalyzer.getReactionProperties() database.translator, database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes( ) #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser, 'uniprot') #print rdfAnnotations #print classifications #for element in equivalenceTranslator: # addToLabelDictionary(database.labelDictionary,element[0],min(element,key=len)) # addToLabelDictionary(database.labelDictionary,element[1],min(element,key=len)) #for element in database.labelDictionary: # database.labelDictionary[element] = [(min(database.labelDictionary[element],key=len),)] #STEP1: Use reaction information to infer w print zip(rules,classifications) for rule, classification in zip(rules, classifications): reaction2 = list(parseReactions(rule)) totalElements = [item for sublist in reaction2 for item in sublist] totalElements = list(set(totalElements)) #obtain elements from the equivalenceTranstor that contain at least two #reactants/products #equivalences = [x for x in equivalenceTranslator if x[0] in reaction2[0] or x[1] in reaction2[1]] database.labelDictionary = defineCorrespondence( reaction2, totalElements, database, classification, rdfAnnotations) #if 'ERKi_MEKi_PP' in database.labelDictionary: # print database.labelDictionary['ERKi_MEKi_PP'],rule #database.labelDictionary = resolveCorrespondence(database) #correctClassifications(rules,classifications,database.labelDictionary) #print 'step1',database.labelDictionary simplify(database.labelDictionary) #TODO: uncomment this section when we solve the bug on reclassifying #print database.labelDictionary cycles = resolveCycles(database, equivalenceTranslator) database.rawLabelDictionary = deepcopy(database.labelDictionary) for _ in range(0, 5): database.labelDictionary = resolveCorrespondence(database, cycles) #print 'after resolving correspondences' classifications2, _, eequivalenceTranslator = sbmlAnalyzer.reclassifyReactions( rules, molecules, database.labelDictionary) for index in range(0, len(classifications)): if classifications[index] in ['None', 'Binding' ] and classifications2[index] != 'None': classifications[index] = classifications2[index] tmp = {} tmp = {x: [database.labelDictionary[x]] for x in database.labelDictionary} database.labelDictionary = tmp #print 'step1.5',database.labelDictionary #STEP2: Use naming conventions for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): equivalence = [ x for x in equivalenceTranslator if element == max(x, key=len) ] if equivalence != []: defineCorrespondenceWithNamingConventions(element, equivalence, database, createOnDemand=True) #for _ in range(0,5): # database.labelDictionary = resolveCorrespondence(database,cycles) #print {x:type(database.labelDictionary[x]) == tuple for x in database.labelDictionary for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): equivalence = [ x for x in equivalenceTranslator if element == max(x, key=len) ] if equivalence != []: defineComplexCorrespondenceWithNamingConventions( element, equivalence, database, createOnDemand=False) #print 'step2',database.labelDictionary #STEP2.5: For those elements that where updated from the naming conventions #we also use the chance to update the reaction classification if necessary # correctClassifications(rules,classifications,database.labelDictionary) #STEP3: Use annotations sbmlAnalyzer.classifyReactionsWithAnnotations(rules, molecules, rdfAnnotations, database.labelDictionary) #for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp): # annotation = [rdfAnnotations[x] for x in rdfAnnotations if element in rdfAnnotations[x]] # if annotation != []: # defineCorrespondenceWithAnnotations(element,annotation,database) simplify(database.labelDictionary) #print 'step3',database.labelDictionary #analyzeSBML.reclassifyReactions(reactions,molecules,labelDictionary,classifications,equivalenceTranslator) cycles = resolveCycles(database, equivalenceTranslator) #TODO: this is causing errors, check for _ in range(0, 5): database.labelDictionary = resolveCorrespondence(database, cycles) correctClassificationsWithCycleInformation(rules, classifications, cycles) #print database.labelDictionary counter = 0 nonProcessedRules = zip(rules, classifications) #TODO:multipass stuff. #while len(nonProcessedRules) > 0: #tmp = [] ##sort. Primary key: number of predependencies a rule has #secondary key: length of the names of the elements involved ruleWeightTable = [] ruleWeight2Table = [] #print equivalenceTranslator for rule in rules: flag = False weight = 0 weight2 = 0 reaction2 = list(parseReactions(rule)) #sort rules according to the complexity of the reactants (not the products) for element in reaction2[0]: if element not in database.labelDictionary: weight = 50 else: weight = max(weight, len(database.labelDictionary[element])) weight2 += len(element) for element in reaction2[0]: weight += sum([ 1 for x in equivalenceTranslator if re.search(r'(_|^)({0})(_|$)'.format(x[1]), element) != None ]) for element in reaction2[0]: weight += element.count('_') ruleWeight2Table.append(weight2) ruleWeightTable.append(weight) nonProcessedRules = zip(ruleWeightTable, ruleWeight2Table, rules, classifications) nonProcessedRules = sorted(nonProcessedRules, key=lambda rule: rule[1]) nonProcessedRules = sorted(nonProcessedRules, key=lambda rule: rule[0]) database.classifications = classifications for idx, (w0, w1, rule, classification) in enumerate(nonProcessedRules): outputFlag = False #if classification == 'Modification': # outputFlag = True counter += 1 reaction2 = list(parseReactions(rule)) if outputFlag: tmp = deepcopy(database.translator) print reaction2 processRule(reaction2, database, classification, eequivalenceTranslator, outputFlag) #if 'EGF_EGFR2_PLCg' in database.translator: # print rule,database.translator['EGF_EGFR2_PLCg'],classification #if 'EGF_EGFRm2_GAP_Grb2_Prot' in database.translator: # print '++++',rule,difflib.SequenceMatcher(None, 'Grb2(egfr,shc!10,sos).EGF(egfr!5,modI~U,modM~M).EGFR(egf!5,egfr!8,gap!9,grb2!11,modI~U,prot,ras_gdp,shc!10).EGF(egfr!7,modI~U,modM~U).EGFR(egf!7,egfr!8,gap!8,grb2,modI~U,prot,ras_gdp,shc!9).GAP(egfr!9).Prot(egfr!11,modI~U,ras_gdp,ras_gtp)' , str(database.translator['EGF_EGFRm2_GAP_Grb2_Prot'])).ratio() if outputFlag: print { x: str(database.translator[x]) for x in database.translator if x not in tmp } for element in database.labelDictionary: if not isinstance(database.labelDictionary[element], tuple): database.translator[element] = database.translator[ database.labelDictionary[element]] raw = [x[0] for x in database.rawDatabase] for element in database.translator: if element in raw: continue for mol in database.translator[element].molecules: if mol.name in database.translator: mol.update(database.translator[mol.name].molecules[0]) return database.translator, logMess.log, parser.getSpeciesAnnotation()
def createSpeciesCompositionGraph(parser, database, configurationFile, namingConventions, speciesEquivalences=None, bioGridFlag=False): _, rules, _ = parser.getReactions(atomize=True) molecules, _, _, _ = parser.getSpecies() database.sbmlAnalyzer = \ analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences) #classify reactions database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\ indirectEquivalenceTranslator, \ adhocLabelDictionary,lexicalDependencyGraph= database.sbmlAnalyzer.classifyReactions(rules, molecules) referenceVariables = [ database.classifications, equivalenceTranslator, database.eequivalenceTranslator, indirectEquivalenceTranslator, adhocLabelDictionary ] comparisonVariables = [deepcopy(x) for x in referenceVariables] #####input processing #states,components,other user options #with open('temp1.dict','w') as f: # pickle.dump(referenceVariables,f) #with open('temp2.dict','w') as f: # pickle.dump(comparisonVariables,f) database.reactionProperties = database.sbmlAnalyzer.getReactionProperties() #user defined and lexical analysis naming conventions are stored here database.reactionProperties.update(adhocLabelDictionary) database.translator, database.labelDictionary, \ database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes() database.dependencyGraph = {} #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser, 'uniprot') ####dependency graph #binding reactions for reaction, classification in zip(rules, database.classifications): bindingReactionsAnalysis(database.dependencyGraph, list(parseReactions(reaction)), classification) for element in lexicalDependencyGraph: database.dependencyGraph[element] = lexicalDependencyGraph[element] #catalysis reactions for key in database.eequivalenceTranslator: for namingEquivalence in database.eequivalenceTranslator[key]: baseElement = min(namingEquivalence, key=len) modElement = max(namingEquivalence, key=len) if key != 'Binding': if baseElement not in database.dependencyGraph or database.dependencyGraph[ baseElement] == []: if modElement not in database.dependencyGraph or database.dependencyGraph[ modElement] == []: database.dependencyGraph[baseElement] = [] #do we have a meaningful reverse dependence? #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]): # addToDependencyGraph(database.dependencyGraph,baseElement,[modElement]) # continue addToDependencyGraph(database.dependencyGraph, modElement, [baseElement]) #non lexical-analysis catalysis reactions if database.forceModificationFlag: for reaction, classification in zip(rules, database.classifications): if classification == 'Transformation': preaction = list(parseReactions(reaction)) if preaction[1][0] in preaction[0][0]: base = preaction[1][0] mod = preaction[0][0] else: mod = preaction[1][0] base = preaction[0][0] if database.dependencyGraph[mod] == []: database.dependencyGraph[mod] = [[base]] ''' #complex catalysis reactions for key in indirectEquivalenceTranslator: #first remove these entries from the dependencyGraph since #they are not true bindingReactions for namingEquivalence in indirectEquivalenceTranslator[key]: removedElement = '' tmp3 = deepcopy(namingEquivalence[1]) if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] else: tmp3.reverse() if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] #then add the new, true dependencies #if its not supposed to be a basic element tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]] tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]]) tmp2 = deepcopy(tmp) tmp2.reverse() ##TODO: map back for the elements in namingEquivalence[2] if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \ and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]: if sorted(tmp) == sorted(tmp3): continue if all(x in database.dependencyGraph for x in tmp): if removedElement in database.dependencyGraph: database.dependencyGraph[removedElement].remove(tmp3) logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\ from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\ .format(removedElement,tmp3,namingEquivalence[3][0],tmp)) database.dependencyGraph[namingEquivalence[3][0]] = [tmp] else: logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \ {2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0], tmp,removedElement,tmp3)) #user defined stuff ''' for element in database.labelDictionary: if len(database.labelDictionary[element][0]) == 0 or element == \ database.labelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: database.dependencyGraph[element] = [ list(database.labelDictionary[element][0]) ] #stuff obtained from string similarity analysis for element in database.lexicalLabelDictionary: #similarity analysis has less priority than anything we discovered #before if element in database.dependencyGraph and \ len(database.dependencyGraph[element]) > 0: continue if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \ database.lexicalLabelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\ .format(element,database.lexicalLabelDictionary[element][0])) database.dependencyGraph[element] = [ list(database.lexicalLabelDictionary[element][0]) ] #pure lexical analysis orphanedSpecies = [ x for x in database.dependencyGraph if database.dependencyGraph[x] == [] ] strippedMolecules = [x.strip('()') for x in molecules] tmpDependency, database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification( orphanedSpecies, strippedMolecules) for species in tmpDependency: if tmpDependency[species] == []: addToDependencyGraph(database.dependencyGraph, species, []) for instance in tmpDependency[species]: addToDependencyGraph(database.dependencyGraph, species, instance) #####sct #FIXME: wtf was unevenelementdict supposed to be for #print database.dependencyGraph prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \ consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer) return prunnedDependencyGraph, database
def loadAnnotations(fileName): reader = libsbml.SBMLReader() document = reader.readSBMLFromFile(fileName) parser = libsbml2bngl.SBML2BNGL(document.getModel()) rdfAnnotations = analyzeRDF.getAnnotations(parser, "miriam") return rdfAnnotations