def update_PCG_with_signalpexons(signalpexonseqs, PCG, OPTIONS, min_pacbporf_identityscore=0.20, verbose=True): """ """ if not signalpexonseqs.has_key(OPTIONS.target): return False is_any_pacbporf_added = False for targetSPexon in signalpexonseqs[OPTIONS.target]: target = OPTIONS.target for informant, infSPlist in signalpexonseqs.iteritems(): if informant == OPTIONS.target: continue # check if informant has been deleted in the meanwhile if informant not in PCG.organism_set(): continue # list to store signalp exons into signalpexon_pacbp_list = [] # get ordered pacbporfs fromt he PCG thepacbporfs = order_pacbporf_list( PCG.get_pacbps_by_organisms(OPTIONS.target, informant)) if not thepacbporfs: # no alignments present for this organism (can happen!) continue for informantSPexon in infSPlist: coords = [ targetSPexon.protein_start(), targetSPexon.protein_end(), informantSPexon.protein_start(), informantSPexon.protein_end(), ] # prior to making ClustalW-PacbP, check PacbPCOORD placeability # into the list of pacbporfs pacbpCoordsObj = PacbPCOORDS(input=( targetSPexon.proteinsequence(), informantSPexon.proteinsequence(), targetSPexon.protein_start(), informantSPexon.protein_start(), )) if False in [ pacbpCoordsObj.is_positioned_compatibly(pacbporf) for pacbporf in thepacbporfs ]: # *NOT* placable in current ordered list of PacbPORFS continue dist = pacbpCoordsObj.distance_towards(thepacbporfs[0]) if dist > SIGNALP_FIRSTEXON_MAX_INTRON_NT_LENGTH / 3: # WAY TO FAR in front of current gene structure parts. # Do not allow (pooras a *NOT* placable in current ordered list of PacbPORFS continue elif dist == 0: # NOT placeable in front of the rest of the PacbPORFS. continue else: pass # perform ClustalW alignment on the SP exons (alignedseqs,alignment) =\ clustalw( seqs= { OPTIONS.target: targetSPexon.proteinsequence(), informant: informantSPexon.proteinsequence() } ) # make pacbp from clustalw alignment pacbp = pacbp_from_clustalw( alignment=(alignedseqs[OPTIONS.target], alignment, alignedseqs[informant]), coords=coords) # is there any alignment constructed? if not pacbp: continue # ignore (very) poor identyscore alignments if pacbp.identityscore < min_pacbporf_identityscore: continue # if here make extended pacbpORF signalpexonPacbpORF = pacbp2pacbporf(pacbp, targetSPexon.orf, informantSPexon.orf) signalpexonPacbpORF.extend_pacbporf_after_stops() # and store in signalpexon_pacbp_list signalpexon_pacbp_list.append(signalpexonPacbpORF) ################################################################ if verbose: print alignedseqs[OPTIONS.target], OPTIONS.target print alignment print alignedseqs[informant], informant if pacbp: print pacbp, (OPTIONS.target, targetSPexon.orf.id), print(informant, informantSPexon.orf.id), print "DISTANCE::", dist pacbp.print_protein() print "" ################################################################ # If there are signalpexon-guided pacbporfs found, store the one # with the highest bitscore if signalpexon_pacbp_list: signalpexon_pacbp_list = order_list_by_attribute( signalpexon_pacbp_list, order_by='bits', reversed=True) # store best bitscoring pacbporf to PCG signalp_pacbporf = signalpexon_pacbp_list[0] pacbporf2PCG(signalp_pacbporf, OPTIONS.target, informant, PCG, source='SignalP-ClustalW') is_any_pacbporf_added = True #################################################################### if verbose: print "SignalP Exon added to PCG:", signalp_pacbporf, informant #################################################################### else: pass # return pointer is_any_pacbporf_added return is_any_pacbporf_added
def update_PCG_with_signalpexons(signalpexonseqs,PCG,OPTIONS, min_pacbporf_identityscore=0.20,verbose=True): """ """ if not signalpexonseqs.has_key(OPTIONS.target): return False is_any_pacbporf_added = False for targetSPexon in signalpexonseqs[OPTIONS.target]: target = OPTIONS.target for informant,infSPlist in signalpexonseqs.iteritems(): if informant == OPTIONS.target: continue # check if informant has been deleted in the meanwhile if informant not in PCG.organism_set(): continue # list to store signalp exons into signalpexon_pacbp_list = [] # get ordered pacbporfs fromt he PCG thepacbporfs = order_pacbporf_list(PCG.get_pacbps_by_organisms(OPTIONS.target,informant)) if not thepacbporfs: # no alignments present for this organism (can happen!) continue for informantSPexon in infSPlist: coords = [ targetSPexon.protein_start(), targetSPexon.protein_end(), informantSPexon.protein_start(), informantSPexon.protein_end(), ] # prior to making ClustalW-PacbP, check PacbPCOORD placeability # into the list of pacbporfs pacbpCoordsObj = PacbPCOORDS(input=( targetSPexon.proteinsequence(), informantSPexon.proteinsequence(), targetSPexon.protein_start(), informantSPexon.protein_start(), ) ) if False in [ pacbpCoordsObj.is_positioned_compatibly(pacbporf) for pacbporf in thepacbporfs ]: # *NOT* placable in current ordered list of PacbPORFS continue dist = pacbpCoordsObj.distance_towards(thepacbporfs[0]) if dist > SIGNALP_FIRSTEXON_MAX_INTRON_NT_LENGTH/3: # WAY TO FAR in front of current gene structure parts. # Do not allow (pooras a *NOT* placable in current ordered list of PacbPORFS continue elif dist == 0: # NOT placeable in front of the rest of the PacbPORFS. continue else: pass # perform ClustalW alignment on the SP exons (alignedseqs,alignment) =\ clustalw( seqs= { OPTIONS.target: targetSPexon.proteinsequence(), informant: informantSPexon.proteinsequence() } ) # make pacbp from clustalw alignment pacbp = pacbp_from_clustalw( alignment=( alignedseqs[OPTIONS.target], alignment, alignedseqs[informant] ), coords=coords ) # is there any alignment constructed? if not pacbp: continue # ignore (very) poor identyscore alignments if pacbp.identityscore < min_pacbporf_identityscore: continue # if here make extended pacbpORF signalpexonPacbpORF = pacbp2pacbporf(pacbp, targetSPexon.orf,informantSPexon.orf) signalpexonPacbpORF.extend_pacbporf_after_stops() # and store in signalpexon_pacbp_list signalpexon_pacbp_list.append( signalpexonPacbpORF ) ################################################################ if verbose: print alignedseqs[OPTIONS.target], OPTIONS.target print alignment print alignedseqs[informant], informant if pacbp: print pacbp, (OPTIONS.target, targetSPexon.orf.id), print (informant, informantSPexon.orf.id), print "DISTANCE::", dist pacbp.print_protein() print "" ################################################################ # If there are signalpexon-guided pacbporfs found, store the one # with the highest bitscore if signalpexon_pacbp_list: signalpexon_pacbp_list = order_list_by_attribute( signalpexon_pacbp_list,order_by='bits',reversed=True) # store best bitscoring pacbporf to PCG signalp_pacbporf = signalpexon_pacbp_list[0] pacbporf2PCG(signalp_pacbporf,OPTIONS.target,informant,PCG,source='SignalP-ClustalW') is_any_pacbporf_added = True #################################################################### if verbose: print "SignalP Exon added to PCG:", signalp_pacbporf, informant #################################################################### else: pass # return pointer is_any_pacbporf_added return is_any_pacbporf_added
def hmmpacbporf2PCG(hmmpacbporf, target, informant, PCG, source=''): """ """ hmmpacbporf.source = 'HMM' hmmpacbporf._gff['fsource'] = 'HMM' pacbporf2PCG(hmmpacbporf, target, informant, PCG, source=source)
def hmmpacbporf2PCG(hmmpacbporf,target,informant,PCG,source=''): """ """ hmmpacbporf.source = 'HMM' hmmpacbporf._gff['fsource'] = 'HMM' pacbporf2PCG(hmmpacbporf,target,informant,PCG,source=source)