def _is_intermediate_overlapping_cbg_a_gsg_scaffold_enrichment(gsg, cbgA,cbgB,cbgC,minimal_scaffold_aa_enrichment=5): """ """ if not (gsg and cbgA and cbgB and cbgC): # (most likely) cbgA or cbgC is not defined # function behavious should be ti return False return False if not cbgA.mutual_nodes(cbgC): # series of CBGs does not represent an suitable # gene structure scaffold -> return False return False # perform this check from graph_genestructure import GenestructureOfCodingBlockGraphs partGSG = GenestructureOfCodingBlockGraphs(gsg.input) partGSG.codingblockgraphs = [ cbgA,cbgB,cbgC ] partGSG._GENETREE = gsg._GENETREE partOMSRa = partGSG.overall_minimal_spanning_range() partGSG.codingblockgraphs = [ cbgA,cbgC ] partOMSRb = partGSG.overall_minimal_spanning_range() scaffold_enrichments = [] for node in cbgA.mutual_nodes(cbgC): org = gsg.organism_by_node(node) scaffold_enrichments.append( len(partOMSRa[org]) - len(partOMSRb[org]) >=\ minimal_scaffold_aa_enrichment ) # check if True in scaffold_enrichments if True in scaffold_enrichments: return True else: return False
def replace_scaffold_breaking_cbgs(self,verbose=False): """ (Try) to replace CBG that break the GSG scaffold by other CBGs @type verbose: Boolean @param verbose: print status/debugging messages to STDOUT @rtype: Boolean @return: Is any CBG replaced? """ # Boolean return value scaffold_breaking_cbg_replaced = False for cbgpos in self.cbgpositerator(reversed=True)[1:]: cbg = self.codingblockgraphs[cbgpos] if cbg.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph': continue if cbg.node_count() < self.EXACT_SG_NODE_COUNT: continue # loop forwards through the GSG and look for mutual nodes identical_nodes = [] for backwardspos in range(cbgpos+1,len(self)): comparecbg = self.codingblockgraphs[backwardspos] if cbg.mutual_nodes(comparecbg): identical_nodes.append(True) break else: identical_nodes.append(False) if not identical_nodes: continue # final CBG -> continue elif identical_nodes == [True]: continue # neighboring node has mutual nodes -> continue elif identical_nodes.count(True) == 0: continue # no mutual nodes at all -> continue else: # this is what we are looking for, a list like # [ False, ... True ] with >1 False # get total_weights of the intermediate CBGs tws = [ self.codingblockgraphs[_pos].total_weight() for\ _pos in range(cbgpos+1,backwardspos) ] ################################################################ if verbose: print cbgpos, backwardspos, identical_nodes print cbg print tws print comparecbg ################################################################ # first, check if the CBGs are already partially overlapping # if so, get rid of this intermediate CBG omsrdist = cbg.omsr_distance_between_codingblocks(comparecbg) if max(omsrdist.values()) <= 1: # yes, all organisms glue these CBGs perfectly together # just remove this one without further checks cbg._CBGinterface3p = None comparecbg._CBGinterface5p = None self.codingblockgraphs.__setslice__( cbgpos+1, backwardspos+1, [ comparecbg ] ) scaffold_breaking_cbg_replaced = True # go to the next cbg in the list continue # do a more eleborate check by trying to create a CBG # in this large_scaffold_gap from graph_genestructure import GenestructureOfCodingBlockGraphs partialGSG = GenestructureOfCodingBlockGraphs(self.input) partialGSG.codingblockgraphs = [ cbg, comparecbg ] partialGSG._GENETREE = self._GENETREE partialGSG.create_large_intermediate_cbg_for_scaffold_gap( sprdif_min_node_count = 2, cbg_min_node_count = self.EXACT_SG_NODE_COUNT, verbose = verbose ) if len(partialGSG) == 2: ############################################################ if verbose: print "NO scaffold CBGs found!" ############################################################ pass else: new_tws = [ _cbg.total_weight() for _cbg in\ partialGSG.codingblockgraphs[1:-1] ] if sum(new_tws) > sum(tws): # replace! self.codingblockgraphs.__setslice__( cbgpos+1, backwardspos, partialGSG.codingblockgraphs[1:-1] ) scaffold_breaking_cbg_replaced = True else: pass ############################################################ if verbose: if sum(new_tws) > sum(tws): print "REPLACING scaffold-breaking CBGs!!!" else: print "MAINTAINING scaffold-breaking CBGs!!!" for cbg in partialGSG: print cbg ############################################################ # return if CBGs are removed return scaffold_breaking_cbg_replaced