def _orf_overlap_rank(theorf,orflist,orfpointer): """ """ subset = [ theorf ] for orf in orflist[orfpointer+1:]: if orf.startPY > theorf.endPY: break subset.append(orf) for pointer in range(orfpointer-1,-1,-1): orf = orflist[pointer] if orf.endPY < theorf.startPY: break subset.insert(0,orf) # order the subset subset = order_list_by_attribute(subset,order_by='coding_propensity',reversed=True) rank=0 for orf in subset: rank+=1 if orf.startPY == theorf.startPY: break # return the rank return rank
def _orf_overlap_rank(theorf, orflist, orfpointer): """ """ subset = [theorf] for orf in orflist[orfpointer + 1:]: if orf.startPY > theorf.endPY: break subset.append(orf) for pointer in range(orfpointer - 1, -1, -1): orf = orflist[pointer] if orf.endPY < theorf.startPY: break subset.insert(0, orf) # order the subset subset = order_list_by_attribute(subset, order_by='coding_propensity', reversed=True) rank = 0 for orf in subset: rank += 1 if orf.startPY == theorf.startPY: break # return the rank return rank
def assembleorf(orflist,start,end,max_errors=2,minimum_errors_only=True): """ """ orflist = order_list_by_attribute(orflist,order_by='length',reversed=True) orfidcombis = [] for startpos in range(0,len(orflist)): centralorf = orflist[startpos] orfset = [ ( centralorf.startPY, centralorf.endPY, centralorf.frame, centralorf ) ] # correct 'end' orf gff track position with STOP codon length! minpos = min([ tup[0] for tup in orfset ]) maxpos = max([ tup[1] for tup in orfset ]) while minpos > start or maxpos < end-3: for pos in range(0,len(orflist)): #if minpos <= start and maxpos >= end-3: break orf = orflist[pos] if orf.id in [ tup[-1].id for tup in orfset ]: continue if minpos > start and orf.startPY < minpos and orf.endPY >= minpos-3: orfset.insert(0, ( orf.startPY, orf.endPY, orf.frame, orf ) ) minpos = min([ tup[0] for tup in orfset ]) maxpos = max([ tup[1] for tup in orfset ]) # check if this orf even OVERLAPS ths current group of orfs if orf.endPY == maxpos: orfset.append( ( orf.startPY, orf.endPY, orf.frame, orf ) ) # goto next iteration break if maxpos < end-3 and orf.endPY > maxpos and orf.startPY <= maxpos+3: orfset.append( ( orf.startPY, orf.endPY, orf.frame, orf ) ) minpos = min([ tup[0] for tup in orfset ]) maxpos = max([ tup[1] for tup in orfset ]) # check if this orf even OVERLAPS ths current group of orfs if orf.startPY == minpos: orfset.insert(0, ( orf.startPY, orf.endPY, orf.frame, orf ) ) # goto next iteration break # calculate minimal amount of sequence changes needed frames = [ tup[2] for tup in orfset ] distance = _getorfgroupframedistance(frames) # only store if not to much errors if not minimum_errors_only and distance[0] > max_errors: pass else: if ( sum(distance), distance, orfset ) not in orfidcombis: orfidcombis.append( ( sum(distance), distance, orfset ) ) # now find the most likely explanation orfidcombis.sort() minimum_error_count = None for (summed,distance,orfset) in orfidcombis: # define minimum number of sequence errors if minimum_error_count == None: minimum_error_count=distance[0] # break when more than minimum sequence errors if minimum_errors_only and distance[0] > minimum_error_count: break # if NO sequence errors -> break as well (not an error, but a missed tiny Orf!) orfids = [ tup[-1].id for tup in orfset ] frames = [ tup[2] for tup in orfset ] # oldprinting style, replaced by PotentialSequenceErrorWarning #print orfids, frames, distance #for tup in orfset: print tup[-1], tup[-1].frame # print PotentialSequenceErrorWarning message = [ str(distance[1:]), "(insertions,deletions,errors)" ] for tup in orfset: message.append( str(tup[-1]) ) print PotentialSequenceErrorWarning( message )
def order_list_by_attribute(self, order_by='', reversed=False): """ @attention: see graphAbgp.ordering.order_list_by_attribute """ self.codingblockgraphs = order_list_by_attribute( self.codingblockgraphs, order_by=order_by, reversed=reversed)
def order_list_by_attribute(self,order_by='',reversed=False): """ @attention: see graphAbgp.ordering.order_list_by_attribute """ self.codingblockgraphs = order_list_by_attribute( self.codingblockgraphs,order_by=order_by,reversed=reversed)
print explain() sysExit() ## TEMPORARILY backwards-compatibility with old input_data_struct.txt file #input = eval(open('input_data_struct.txt').read().strip()) # do geneconfirmation; this includes rungetorf() function call input, gene_status = geneconfirmation(input, verbose=True) # proces tcode data input = obtaintcodedata(input) for org in input.keys(): # loop over all the Orf objects and score their coding propensity rank orflist = input[org]['orfs'].orfs orflist = order_list_by_attribute(orflist, 'startPY') for orfpointer in range(0, len(orflist)): orf = orflist[orfpointer] # add data to tss object orf.coding_propensity = orf.tcode_score() * orf.length for orfpointer in range(0, len(orflist)): orf = orflist[orfpointer] # define _orf_overlap_rank rank = _orf_overlap_rank(orf, orflist, orfpointer) orf._orf_overlap_rank = rank # order orflist by coding_propensity and score this rank orflist = order_list_by_attribute(orflist, 'coding_propensity', reversed=True)
print explain() sysExit() ## TEMPORARILY backwards-compatibility with old input_data_struct.txt file #input = eval(open('input_data_struct.txt').read().strip()) # do geneconfirmation; this includes rungetorf() function call input,gene_status = geneconfirmation(input,verbose=True) # proces tcode data input = obtaintcodedata(input) for org in input.keys(): # loop over all the Orf objects and score their coding propensity rank orflist = input[org]['orfs'].orfs orflist = order_list_by_attribute(orflist,'startPY') for orfpointer in range(0,len(orflist)): orf = orflist[orfpointer] # add data to tss object orf.coding_propensity = orf.tcode_score() * orf.length for orfpointer in range(0,len(orflist)): orf = orflist[orfpointer] # define _orf_overlap_rank rank = _orf_overlap_rank(orf,orflist,orfpointer) orf._orf_overlap_rank = rank # order orflist by coding_propensity and score this rank orflist = order_list_by_attribute(orflist,'coding_propensity',reversed=True) cnt = 1 for orf in orflist: