Пример #1
0
    def add_codingblock(self,
                        new,
                        log_debug=False,
                        only_try_adding=False,
                        max_cbg_gtg_topo_dif=None,
                        max_cbg_gtg_abs_dif=None,
                        min_cbg_gtg_id_ratio=None,
                        min_tcode_omsr=None,
                        omit_conditional_addition=False):
        """
        (Try to) add a CodingBlockGraph to the genestructure

        @type  new: CodingBlockGraph
        @param new: CodingBlockGraph object to be added to the genestructure

        @type  only_try_adding: Boolean
        @param only_try_adding: only try to add the CBG and return succes status

        @type  omit_conditional_addition: Boolean
        @param omit_conditional_addition: omit all CBG quality checks; just
                verify if the CBG is placeable based on the OMSR coordinates

        @type  max_cbg_gtg_topo_dif: float (or None)
        @param max_cbg_gtg_topo_dif:

        @type  max_cbg_gtg_abs_dif: float (or None)
        @param max_cbg_gtg_abs_dif:

        @type  max_cbg_gtg_id_ratio: float (or None)
        @param max_cbg_gtg_id_ratio:

        @type  min_tcode_omsr: float (or None)
        @param min_tcode_omsr:


        @rtype:  Boolean
        @return: True or False, weather or not adding was succesfull
        """

        verbose = log_debug  # log_debug must be replaced by verbose....

        # update edge weights by overall minimal spanning range
        if not only_try_adding:
            new.update_edge_weights_by_minimal_spanning_range()

        # check for difference with GeneTreeGraph
        if new.__class__.__name__ == 'CodingBlockGraph' and\
        not omit_conditional_addition and\
        self.genetree() and len(self) >= 1:
            newgtg = new.genetree()

            if new.node_count() == self.EXACT_SG_NODE_COUNT:
                gtg = self.genetree()
            else:
                # new to-be-placed graph misses certain organism node(s)
                completegtg = self.genetree()
                gtg = deepcopy(completegtg)
                for missingorg in completegtg.organism_set().difference(
                        new.organism_set()):
                    gtg.del_node(missingorg)

            # calculate identity, topological and absolute GTG differences
            cbg_gtg_topo_dif = gtg.graphalignmentdifference(newgtg)
            cbg_gtg_abs_dif = gtg.absolutegraphalignmentdifference(newgtg)
            cbg_gtg_id_ratio = newgtg.identity() / gtg.identity()

            ####################################################################
            if verbose: print "cbg2gsg", new
            ####################################################################

            # check the identity ratio
            if min_cbg_gtg_id_ratio:
                threshold_min_cbg_gtg_id_ratio = MIN_GTG_ID_RATIO_FUNCTION(
                    min_cbg_gtg_id_ratio, gtg, new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_min_cbg_gtg_id_ratio,
                    print min_cbg_gtg_id_ratio
                ################################################################
            else:
                threshold_min_cbg_gtg_id_ratio = MIN_GTG_ID_RATIO_FUNCTION(
                    self.MIN_CBG_GTG_ID_RATIO, gtg, new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_min_cbg_gtg_id_ratio,
                    print self.MIN_CBG_GTG_ID_RATIO
                ################################################################

            if cbg_gtg_id_ratio < threshold_min_cbg_gtg_id_ratio:
                ################################################################
                if verbose:
                    print "rejected on ID ratio", threshold_min_cbg_gtg_id_ratio,
                    print ">", cbg_gtg_id_ratio
                ################################################################
                return False
            else:
                pass

            # check the relative topological difference
            if max_cbg_gtg_topo_dif:
                threshold_max_cbg_gtg_topo_dif = MAX_GTG_TOPO_DIF_FUNCTION(
                    max_cbg_gtg_topo_dif, gtg, new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_max_cbg_gtg_topo_dif,
                    print max_cbg_gtg_topo_dif, gtg.identity(), new.omsrlength(
                    )
                ################################################################
            else:
                threshold_max_cbg_gtg_topo_dif = MAX_GTG_TOPO_DIF_FUNCTION(
                    self.MAX_CBG_GTG_TOPO_DIF, gtg, new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_max_cbg_gtg_topo_dif,
                    print self.MAX_CBG_GTG_TOPO_DIF, gtg.identity(
                    ), new.omsrlength()
                ################################################################

            if cbg_gtg_id_ratio >= 1.10:
                # ignore TOPO_DIF check when newgtg.id% >> gtg.id%
                pass
            elif cbg_gtg_topo_dif > threshold_max_cbg_gtg_topo_dif:
                ################################################################
                if verbose:
                    print "rejected on TOPO_DIF", cbg_gtg_topo_dif,
                    print ">", threshold_max_cbg_gtg_topo_dif
                ################################################################
                return False
            else:
                pass

            # check the absolute topological difference
            if max_cbg_gtg_abs_dif:
                threshold_max_cbg_gtg_abs_dif = MAX_GTG_ABS_DIF_FUNCTION(
                    max_cbg_gtg_abs_dif, gtg, new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_max_cbg_gtg_abs_dif,
                    print max_cbg_gtg_abs_dif, gtg.identity(), new.omsrlength()
                ################################################################
            else:
                threshold_max_cbg_gtg_abs_dif = MAX_GTG_ABS_DIF_FUNCTION(
                    self.MAX_CBG_GTG_ABS_DIF, gtg, new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_max_cbg_gtg_abs_dif,
                    print self.MAX_CBG_GTG_ABS_DIF
                ################################################################

            if cbg_gtg_id_ratio >= 1.10:
                # ignore TOPO_DIF check when newgtg.id% >> gtg.id%
                pass
            elif cbg_gtg_abs_dif > threshold_max_cbg_gtg_abs_dif:
                ################################################################
                if verbose:
                    print "rejected on ABS_DIF", threshold_max_cbg_gtg_abs_dif,
                    print "<", cbg_gtg_abs_dif
                ################################################################

                return False
            else:
                pass

            # check the Tcode score
            if min_tcode_omsr > new.msr_tcode_score():
                ################################################################
                if verbose:
                    print "rejected on MIN_TCODE_OMSR", min_tcode_omsr, ">",
                    print new.msr_tcode_score()
                ################################################################
                return False
            else:
                pass

        else:
            # probably omit_conditional_addition==True
            pass

        # check if exactly this one is already in the genestructure
        if self.is_codingblockgraph_already_in_genestructure(new):
            ####################################################################
            if verbose: print "already in genestructure!!"
            ####################################################################
            return False

        # if LowSimilarityRegionCodingBlockGraph, do a OMSR check
        if new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
            # get the OMSR of the complete GeneStructure
            omsrGSG = self.overall_minimal_spanning_range()

            for node, omsr in new._omsr.iteritems():
                org = new.organism_by_node(node)
                if omsrGSG[org].intersection(omsr):
                    ############################################################
                    if verbose:
                        print "ALREADY IN GENESTRUCTURE!", "\n", omsr
                        print omsrGSG[org].intersection(omsr)
                    ############################################################
                    return False
            else:
                ################################################################
                if verbose: print "lsrCBG NOT in genestructure's OMSR"
                ################################################################
                # new lsrCBG! just continue with this function
                pass

        for pos in range(0, len(self)):
            cbg = self.codingblockgraphs[pos]
            # do not compare position towards a LowSimilarityRegionCodingBlockGraph
            if cbg.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                continue

            # get relative positioning data of 2 CBGs
            (absPosCbg, absPosNew, binPosCbg, binPosNew, orfIdent, posRel,
             posBin) = relatively_positioned_towards(cbg, new)

            if verbose:
                print "eval cbg pos", pos, "binarytuples:", binPosCbg, binPosNew

            # Check the positioning in binaryCbgPositioning
            # Required positioning of new codingblock `new` is cbg-->new
            if binPosCbg == (1, 0, 0) and binPosNew == (0, 0, 1):
                # The order is new-->cbg; continue
                continue

            elif new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph' and new.node_set(
            ).difference(cbg.node_set()):
                # binPos comparison assumes correct position, but it is not because node intersection
                # not all nodes in lsrCBG 'new'  are shared in next CBG 'cbg' -> ignore here!
                continue

            elif new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph' and binPosCbg == (
                    1, 0, 0) and binPosNew == (0, 0, 0):
                # Final check -> is the lsrCBG directly ajacent to the cbg?
                # In case a single orf set is split up in several CBGs (and several lsrCBGs),
                # insertion can give problems when not verifying the distance
                distances = cbg.distance_between_codingblocks(new)
                if list(Set(distances.values())) == [0]:
                    # yes, this is the position where we want to insert
                    pass
                else:
                    # nope, not the correct, diretly adjacent position
                    if verbose:
                        print "# NOT ADDING HERE a lsrCBG????"
                        print cbg
                        print new
                        print distances
                    # continue to the next cbg position
                    continue

                # if only_try_adding, return a succesfull True!
                if only_try_adding: return True

                if verbose:
                    print new, binPosCbg, binPosNew, "inserting on pos %s+1" % pos

                # Add into the ordered GeneStructure!
                tobeadded = new
                tobeadded.create_cache()
                self.codingblockgraphs.insert(pos + 1, tobeadded)
                # succesfull insert; return True
                return True

            elif binPosCbg == (0, 0, 1) and binPosNew == (1, 0, 0):
                # The order is cbg-->new; this is the required location!

                if new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                    # check if directly neighboring this cbg
                    distances = cbg.distance_between_codingblocks(new)
                    print new
                    print distances

                # if only_try_adding, return a succesfull True!
                if only_try_adding: return True

                # Make deepcopy of this CBG in orde to prevent abnormalities
                # when splitting/merging in lateron function
                #tobeadded = deepcopy(new)
                tobeadded = new
                tobeadded.create_cache()
                self.codingblockgraphs.insert(pos, tobeadded)
                if verbose:
                    print "ADDING:", pos, new
                    print binPosCbg, binPosNew

                # succesfull insert; return True
                return True

            elif binPosCbg == (0, 1, 0) and binPosNew == (0, 1, 0):
                # Exactly this codingblock exists already in the genestructure
                return False

            elif binPosCbg in [(1, 0, 0),
                               (1, 1, 0)] and binPosNew in [(0, 0, 1),
                                                            (0, 1, 1)]:
                # Check for: binPosCbg ~= (1,?,0) and binPosNew ~= (0,?,1)
                # Some overlap, and the order is new-->cbg
                # So, potential accepted new CBG 1 position AFTER this one!
                # Check if the overlap is compatible with Orf order
                for i in range(0, len(posBin)):
                    (a1, a2, a3), (b1, b2, b3) = posRel[i]
                    (Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3) = posBin[i]
                    identicalorfs = orfIdent[i]
                    if _is_compatible_overlap((a1, a2, a3), (b1, b2, b3),
                                              (Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3),
                                              identicalorfs):
                        pass
                    else:
                        if verbose:
                            print "ABOUT TO BREAK THE FORLOOP I:"
                            print(a1, a2, a3), (b1, b2, b3)
                            print(Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3)
                            cbg_overlap_ratio = float(a2) / float(a1 + a2 + a3)
                            new_overlap_ratio = float(b2) / float(b1 + b2 + b3)
                            overlap_ratio = max(
                                [cbg_overlap_ratio, new_overlap_ratio])
                            print overlap_ratio
                        # No! incorrect positioning -> break
                        break
                else:
                    # EOF forloop nicely reached; compatible new codingblock
                    # The order is cbg-->new; this is the required location!

                    # check how it is positioned towards the other; NO OVERLAP ALOWED HERE!
                    if pos < len(self) - 1:
                        next = self.codingblockgraphs[pos + 1]
                        (absPosNext, absPosNewNext, binPosNext, binPosNewNext,
                         orfIdentNext, posRelNext,
                         posBinNext) = relatively_positioned_towards(
                             new, next)
                        # now check binPosNext & binPosNewNext: should be (1, 0, 0) & (0, 0, 1)
                        # That means -> no overlap with the next CBG
                        if binPosNext == (1, 0, 0) and binPosNewNext == (0, 0,
                                                                         1):
                            # no overlap -> ready to store!
                            pass
                        else:
                            # there is overlap! Do not allow addition of this CBG
                            ###########################################################
                            if verbose:
                                print "OVERLAP WITH NEXT CBG!:", pos, new
                                print binPosNext, binPosNewNext
                            ###########################################################
                            return False
                    else:
                        # no further CBGs in GSG, so no check possible (and no overlap possible ;-)
                        pass

                    # if only_try_adding, return a succesfull True!
                    if only_try_adding: return True

                    # Make deepcopy of this CBG in orde to prevent abnormalities
                    # when splitting/merging in lateron function
                    #tobeadded = deepcopy(new)
                    tobeadded = new
                    tobeadded.create_cache()
                    self.codingblockgraphs.insert(pos + 1, tobeadded)
                    if verbose:
                        print "ADDING:", pos + 1, new
                        print binPosCbg, binPosNew
                    # succesfull insert; return True
                    return True

                # forloop broken -> incompatible new codingblock
                return False

            elif binPosCbg in [(0, 0, 1),
                               (0, 1, 1)] and binPosNew in [(1, 0, 0),
                                                            (1, 1, 0)]:
                # Check for: binPosCbg ~= (0,?,1) and binPosNew ~= (1,?,0)
                # The order is new-->cbg; this is the required location!
                # Check if the overlap is compatible with Orf order
                for i in range(0, len(posBin)):
                    (a1, a2, a3), (b1, b2, b3) = posRel[i]
                    (Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3) = posBin[i]
                    identicalorfs = orfIdent[i]
                    if _is_compatible_overlap((a1, a2, a3), (b1, b2, b3),
                                              (Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3),
                                              identicalorfs):
                        pass
                    else:
                        if verbose:
                            print "ABOUT TO BREAK THE FORLOOP II:"
                            print(a1, a2, a3), (b1, b2, b3)
                            print(Ba1, Ba2, Ba3), (Bb1, Bb2, Bb3)
                            cbg_overlap_ratio = float(a2) / float(a1 + a2 + a3)
                            new_overlap_ratio = float(b2) / float(b1 + b2 + b3)
                            overlap_ratio = max(
                                [cbg_overlap_ratio, new_overlap_ratio])
                            print overlap_ratio
                        # No! incorrect positioning -> break
                        break
                else:
                    # EOF forloop nicely reached; compatible new codingblock
                    # The order is new-->cbg; this is the required location!

                    # if only_try_adding, return a succesfull True!
                    if only_try_adding: return True

                    # Make deepcopy of this CBG in orde to prevent abnormalities
                    # when splitting/merging in lateron function
                    #tobeadded = deepcopy(new)
                    tobeadded = new
                    tobeadded.create_cache()
                    self.codingblockgraphs.insert(pos, tobeadded)
                    # succesfull insert; return True
                    return True

                # forloop broken -> incompatible new codingblock
                return False

            else:
                # A more messy positioning (overlaps/repetitive etc.)
                if verbose:
                    print "WEIRD BINARY SUMMED ORDER!", absPosCbg, absPosNew, binPosCbg, binPosNew
                    print cbg
                    for i in range(0, len(posBin)):
                        print posRel[i], posBin[i], orfIdent[i]
                # Reject this new codingblockgrap
                return False
        else:
            # If eof for loop is reached, append to the end
            # of current genestructure

            # if only_try_adding, return a succesfull True!
            if only_try_adding: return True

            # Make deepcopy of this CBG in orde to prevent abnormalities
            # when splitting/merging in lateron function
            #tobeadded = deepcopy(new)
            tobeadded = new
            tobeadded.create_cache()
            self.codingblockgraphs.append(tobeadded)
            # check if this is the first added cbg to GeneStructure object
            if len(self.codingblockgraphs) == 1:
                # cache the genetreegraph object
                gtg = self.set_genetree()
                # set threshold values as a function of gtg
                self.initialize_first_added_cbg()

            # succesfull insert; return True
            return True
Пример #2
0
    def remove_overlapping_cbgs(self,verbose=False,
        ignore_is_optimal_cbgif=True,
        ignore_is_compatible_cbgif=False,
        cbg_max_alowed_overlap_aa_length=CBG_MAX_ALOWED_GSGREMOVAL_OVERLAP_AA_LENGTH,
        cbg_max_alowed_overlap_ratio=CBG_MAX_ALOWED_GSGREMOVAL_OVERLAP_RATIO):
        """
        Remove overlapping CBGs in the GSG

        @type  ignore_is_optimal_cbgif: Boolean 
        @param ignore_is_optimal_cbgif: if True, leave is_optimal() cbgIFs intact 

        @type  ignore_is_compatible_cbgif: Boolean
        @param ignore_is_compatible_cbgif: if True, leave is_compatible() cbgIFs intact

        @type  cbg_max_alowed_aa_length: integer
        @param cbg_max_alowed_aa_length: maximal overlap between CBGs in AA's

        @type  cbg_max_alowed_overlap_ratio: float
        @param cbg_max_alowed_overlap_ratio: ratio between overlap and omsr (AA)

        @type  verbose: Boolean
        @param verbose: print status/debugging messages to STDOUT

        @rtype:  Integer
        @return: Number of CBGs that are removed from the GSG
        """
        removed = True
        removed_cnt = 0
        while removed:
            removed = False
            for pos in range(1,len(self)):
                # get concerned CBGs
                (cbg1,cbg2) = self.codingblockgraphs[pos-1:pos+1]
                if cbg1.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                    continue
                if cbg2.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                    continue
                prevCBG = None
                nextCBG = None
                if pos-2 >= 0:
                    prevCBG = self.codingblockgraphs[pos-2]
                if pos+1 < len(self):
                    nextCBG = self.codingblockgraphs[pos+1]

                # check cbgIF between these CBGs
                if cbg1._CBGinterface3p:
                    if ignore_is_optimal_cbgif and\
                    cbg1._CBGinterface3p.is_optimal():
                        continue
                    if ignore_is_compatible_cbgif and\
                    cbg1._CBGinterface3p.is_compatible():
                        print "COMPATBLE!!!"
                        continue

                # get overlap data between these 2 CBGs
                (  absPosCbg1, absPosCbg2,
                   binPosCbg1, binPosCbg2, orfIdent,
                   posRel, posBin ) = codingblock_ordering.relatively_positioned_towards(cbg1,cbg2)

                # evaluate the overlap data
                if binPosCbg1 != (1, 0, 0) or binPosCbg2 != (0, 0, 1):
                    distances = cbg1.distance_between_codingblocks(cbg2)
                    overlaps  = [ min([0,value]) for value in distances.values() ]
                    # remove all non-overlaps
                    while 0 in overlaps: overlaps.remove(0)
                    # now sum them and calculate an average
                    summed    = abs(sum(overlaps))
                    if summed:  average = float(summed) / len(overlaps)
                    else:       average = 0.0

                    ############################################################
                    #if verbose:
                    #    print cbg1
                    #    print cbg2
                    #    print pos-1, pos+1,
                    #    print absPosCbg1, absPosCbg2,
                    #    print binPosCbg1, binPosCbg2
                    #    print distances, summed, average,
                    #    print cbg1.total_weight(), cbg2.total_weight()
                    ############################################################

                    # check if the overlap ratio is not to large
                    ratioCbg1 = ( average / cbg1.omsrlength() ) > cbg_max_alowed_overlap_ratio
                    ratioCbg2 = ( average / cbg2.omsrlength() ) > cbg_max_alowed_overlap_ratio
                    remove_pos = None
                    if average > cbg_max_alowed_overlap_aa_length and ratioCbg1 and ratioCbg2:
                        # hmmm both have a high overlap ratio -> remove the lowest scoring
                        if cbg1.total_weight() < cbg2.total_weight():
                            remove_pos = pos-1
                        else:
                            remove_pos = pos
                    elif average > cbg_max_alowed_overlap_aa_length and ratioCbg1:
                        remove_pos = pos-1
                    elif average > cbg_max_alowed_overlap_aa_length and ratioCbg2:
                        remove_pos = pos
                    elif ratioCbg1 and ratioCbg2:
                        # hmmm both have a high overlap ratio -> remove the lowest scoring
                        if cbg1.total_weight() < cbg2.total_weight():
                            remove_pos = pos-1
                        else:
                            remove_pos = pos
                    elif ratioCbg1:
                        remove_pos = pos-1
                    elif ratioCbg2:
                        remove_pos = pos
                    else:
                        # omit removal!
                        pass

                    if remove_pos != None:
                        # final check: if node_count() not identical ->
                        # then remove the one that has lowest number of nodes!
                        if cbg1.node_count() > cbg2.node_count():
                            remove_pos = pos
                        elif cbg1.node_count() < cbg2.node_count():
                            remove_pos = pos-1
                        else:
                            # identical node_count -> stick to the oppointed one!
                            pass

                        # get positional pointer to CBG with which the to-be-deleted
                        # CBG is overlapping with (for verbose logginf)
                        if remove_pos == pos:
                            overlapping_cbg_pos = pos-1
                            theCBGif = cbg2._CBGinterface5p
                            # final check: does to-be-deleted CBG
                            # fill a gap in the genestructure scaffold?
                            if _is_intermediate_overlapping_cbg_a_gsg_scaffold_enrichment(
                            self,cbg1,cbg2,nextCBG):
                                # this is a perfect example of a scaffold enrichment,
                                # caused by a small exon in >= 1 genes, compared to
                                # continious exons in >= 1 other genes.
                                #######################################
                                if verbose:
                                    print "SCAFFOLD ENRICHMENT"
                                    print cbg1
                                    print cbg2
                                    print nextCBG,"NEXT"
                                #######################################
                                continue
                        else:
                            overlapping_cbg_pos = remove_pos 
                            theCBGif = cbg1._CBGinterface3p
                            # final check: does to-be-deleted CBG
                            # fill a gap in the genestructure scaffold?
                            if _is_intermediate_overlapping_cbg_a_gsg_scaffold_enrichment(
                            self,prevCBG,cbg1,cbg2):
                                # this is a perfect example of a scaffold enrichment,
                                # caused by a small exon in >= 1 genes, compared to
                                # continious exons in >= 1 other genes.
                                #######################################
                                if verbose:
                                    print "SCAFFOLD ENRICHMENT"
                                    print prevCBG,"PREV"
                                    print cbg1
                                    print cbg2
                                #######################################
                                continue

                        # remove this codingblock!
                        deletedCBG = self.remove_cbg_by_pos(remove_pos)
                        ############################################################
                        if verbose:
                            print "REMOVED!", pos, overlapping_cbg_pos, remove_pos
                            print deletedCBG
                            deletedCBG.printmultiplealignment()
                            for (key,n1,n2),pacbp in deletedCBG.pacbps.iteritems():
                                print pacbp,n1,n2
                            print "IN VIOLENCE WITH:"
                            print self.codingblockgraphs[overlapping_cbg_pos] 
                            self.codingblockgraphs[overlapping_cbg_pos].printmultiplealignment()
                            for (key,n1,n2),pacbp in\
                            self.codingblockgraphs[overlapping_cbg_pos].pacbps.iteritems():
                                print pacbp,n1,n2
                            if theCBGif: 
                                print "INTERFACE"
                                print theCBGif
                                print theCBGif._interface_is_intron
                        ############################################################
                        # set removed variable to True for the next iteration!
                        removed = True
                        removed_cnt += 1
                        break
                    else:
                        pass
                        ############################################################
                        #if verbose: print "overlap oke...."
                        ############################################################

        # return the number of removed CBGs from the GSG
        return removed_cnt
Пример #3
0
    def add_codingblock(self,new,log_debug=False,only_try_adding=False,
        max_cbg_gtg_topo_dif=None,
        max_cbg_gtg_abs_dif=None,
        min_cbg_gtg_id_ratio=None,
        min_tcode_omsr=None,
        omit_conditional_addition=False ):
        """
        (Try to) add a CodingBlockGraph to the genestructure

        @type  new: CodingBlockGraph
        @param new: CodingBlockGraph object to be added to the genestructure

        @type  only_try_adding: Boolean
        @param only_try_adding: only try to add the CBG and return succes status

        @type  omit_conditional_addition: Boolean
        @param omit_conditional_addition: omit all CBG quality checks; just
                verify if the CBG is placeable based on the OMSR coordinates

        @type  max_cbg_gtg_topo_dif: float (or None)
        @param max_cbg_gtg_topo_dif:

        @type  max_cbg_gtg_abs_dif: float (or None)
        @param max_cbg_gtg_abs_dif:

        @type  max_cbg_gtg_id_ratio: float (or None)
        @param max_cbg_gtg_id_ratio:

        @type  min_tcode_omsr: float (or None)
        @param min_tcode_omsr:


        @rtype:  Boolean
        @return: True or False, weather or not adding was succesfull
        """

        verbose = log_debug # log_debug must be replaced by verbose....

        # update edge weights by overall minimal spanning range
        if not only_try_adding:
            new.update_edge_weights_by_minimal_spanning_range()

        # check for difference with GeneTreeGraph
        if new.__class__.__name__ == 'CodingBlockGraph' and\
        not omit_conditional_addition and\
        self.genetree() and len(self) >= 1:
            newgtg = new.genetree()

            if new.node_count() == self.EXACT_SG_NODE_COUNT:
                gtg = self.genetree()
            else:
                # new to-be-placed graph misses certain organism node(s)
                completegtg = self.genetree()
                gtg  = deepcopy(completegtg)
                for missingorg in completegtg.organism_set().difference(new.organism_set()):
                    gtg.del_node(missingorg)


            # calculate identity, topological and absolute GTG differences
            cbg_gtg_topo_dif = gtg.graphalignmentdifference( newgtg )
            cbg_gtg_abs_dif  = gtg.absolutegraphalignmentdifference( newgtg )
            cbg_gtg_id_ratio = newgtg.identity() / gtg.identity()

            ####################################################################
            if verbose: print "cbg2gsg", new
            ####################################################################

            # check the identity ratio
            if min_cbg_gtg_id_ratio:
                threshold_min_cbg_gtg_id_ratio = MIN_GTG_ID_RATIO_FUNCTION(min_cbg_gtg_id_ratio,gtg,new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_min_cbg_gtg_id_ratio,
                    print min_cbg_gtg_id_ratio
                ################################################################
            else:
                threshold_min_cbg_gtg_id_ratio = MIN_GTG_ID_RATIO_FUNCTION(self.MIN_CBG_GTG_ID_RATIO,gtg,new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_min_cbg_gtg_id_ratio,
                    print self.MIN_CBG_GTG_ID_RATIO
                ################################################################

            if cbg_gtg_id_ratio < threshold_min_cbg_gtg_id_ratio:
                ################################################################
                if verbose:
                    print "rejected on ID ratio", threshold_min_cbg_gtg_id_ratio,
                    print ">", cbg_gtg_id_ratio
                ################################################################
                return False
            else:
                pass


            # check the relative topological difference
            if max_cbg_gtg_topo_dif:
                threshold_max_cbg_gtg_topo_dif = MAX_GTG_TOPO_DIF_FUNCTION(max_cbg_gtg_topo_dif,gtg,new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_max_cbg_gtg_topo_dif,
                    print max_cbg_gtg_topo_dif, gtg.identity(), new.omsrlength()
                ################################################################
            else:
                threshold_max_cbg_gtg_topo_dif = MAX_GTG_TOPO_DIF_FUNCTION(self.MAX_CBG_GTG_TOPO_DIF,gtg,new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_max_cbg_gtg_topo_dif,
                    print self.MAX_CBG_GTG_TOPO_DIF, gtg.identity(), new.omsrlength()
                ################################################################

            if cbg_gtg_id_ratio >= 1.10:
                # ignore TOPO_DIF check when newgtg.id% >> gtg.id%
                pass 
            elif cbg_gtg_topo_dif > threshold_max_cbg_gtg_topo_dif: 
                ################################################################
                if verbose:
                    print "rejected on TOPO_DIF", cbg_gtg_topo_dif,
                    print ">", threshold_max_cbg_gtg_topo_dif
                ################################################################
                return False
            else:
                pass

            # check the absolute topological difference
            if max_cbg_gtg_abs_dif:
                threshold_max_cbg_gtg_abs_dif = MAX_GTG_ABS_DIF_FUNCTION(max_cbg_gtg_abs_dif,gtg,new)
                ################################################################
                if verbose:
                    print "CUSTOM", threshold_max_cbg_gtg_abs_dif,
                    print max_cbg_gtg_abs_dif, gtg.identity(), new.omsrlength()
                ################################################################
            else:
                threshold_max_cbg_gtg_abs_dif = MAX_GTG_ABS_DIF_FUNCTION(self.MAX_CBG_GTG_ABS_DIF,gtg,new)
                ################################################################
                if verbose:
                    print "NORMAL", threshold_max_cbg_gtg_abs_dif,
                    print self.MAX_CBG_GTG_ABS_DIF
                ################################################################


            if cbg_gtg_id_ratio >= 1.10:
                # ignore TOPO_DIF check when newgtg.id% >> gtg.id%
                pass
            elif cbg_gtg_abs_dif > threshold_max_cbg_gtg_abs_dif:
                ################################################################
                if verbose:
                    print "rejected on ABS_DIF", threshold_max_cbg_gtg_abs_dif,
                    print "<", cbg_gtg_abs_dif
                ################################################################

                return False
            else:
                pass


            # check the Tcode score
            if min_tcode_omsr > new.msr_tcode_score():
                ################################################################
                if verbose:
                    print "rejected on MIN_TCODE_OMSR", min_tcode_omsr, ">",
                    print new.msr_tcode_score() 
                ################################################################
                return False
            else:
                pass 

        else:
            # probably omit_conditional_addition==True
            pass


        # check if exactly this one is already in the genestructure
        if self.is_codingblockgraph_already_in_genestructure(new):
            ####################################################################
            if verbose: print "already in genestructure!!"
            ####################################################################
            return False

        # if LowSimilarityRegionCodingBlockGraph, do a OMSR check
        if new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
            # get the OMSR of the complete GeneStructure
            omsrGSG = self.overall_minimal_spanning_range()

            for node,omsr in new._omsr.iteritems():
                org = new.organism_by_node(node)
                if omsrGSG[org].intersection(omsr):
                    ############################################################
                    if verbose:
                        print "ALREADY IN GENESTRUCTURE!", "\n", omsr
                        print omsrGSG[org].intersection(omsr)
                    ############################################################
                    return False
            else:
                ################################################################
                if verbose: print "lsrCBG NOT in genestructure's OMSR"
                ################################################################
                # new lsrCBG! just continue with this function
                pass


        for pos in range(0,len(self)):
            cbg = self.codingblockgraphs[pos]
            # do not compare position towards a LowSimilarityRegionCodingBlockGraph
            if cbg.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                continue

            # get relative positioning data of 2 CBGs
            ( absPosCbg, absPosNew, binPosCbg, binPosNew, orfIdent, posRel, posBin) = relatively_positioned_towards(cbg,new)

            if verbose: print "eval cbg pos", pos, "binarytuples:", binPosCbg, binPosNew

            # Check the positioning in binaryCbgPositioning
            # Required positioning of new codingblock `new` is cbg-->new
            if binPosCbg == (1,0,0) and binPosNew == (0,0,1):
                # The order is new-->cbg; continue
                continue

            elif new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph' and new.node_set().difference(cbg.node_set()):
                # binPos comparison assumes correct position, but it is not because node intersection
                # not all nodes in lsrCBG 'new'  are shared in next CBG 'cbg' -> ignore here!
                continue

            elif new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph' and binPosCbg == (1,0,0) and binPosNew == (0,0,0):
                # Final check -> is the lsrCBG directly ajacent to the cbg?
                # In case a single orf set is split up in several CBGs (and several lsrCBGs),
                # insertion can give problems when not verifying the distance
                distances = cbg.distance_between_codingblocks(new)
                if list(Set(distances.values())) == [0]:
                    # yes, this is the position where we want to insert
                    pass
                else:
                    # nope, not the correct, diretly adjacent position
                    if verbose:
                        print "# NOT ADDING HERE a lsrCBG????"
                        print cbg
                        print new
                        print distances
                    # continue to the next cbg position
                    continue

                # if only_try_adding, return a succesfull True!
                if only_try_adding: return True

                if verbose:
                    print new, binPosCbg, binPosNew, "inserting on pos %s+1" % pos

                # Add into the ordered GeneStructure!
                tobeadded = new
                tobeadded.create_cache()
                self.codingblockgraphs.insert( pos+1, tobeadded )
                # succesfull insert; return True
                return True



            elif binPosCbg == (0,0,1) and binPosNew == (1,0,0):
                # The order is cbg-->new; this is the required location!


                if new.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                    # check if directly neighboring this cbg
                    distances = cbg.distance_between_codingblocks(new)
                    print new
                    print distances

                # if only_try_adding, return a succesfull True!
                if only_try_adding: return True

                # Make deepcopy of this CBG in orde to prevent abnormalities
                # when splitting/merging in lateron function
                #tobeadded = deepcopy(new)
                tobeadded = new
                tobeadded.create_cache()
                self.codingblockgraphs.insert( pos, tobeadded )
                if verbose:
                    print "ADDING:", pos, new
                    print  binPosCbg, binPosNew

                # succesfull insert; return True
                return True

            elif binPosCbg == (0,1,0) and binPosNew == (0,1,0):
                # Exactly this codingblock exists already in the genestructure
                return False

            elif binPosCbg in [(1,0,0),(1,1,0)] and binPosNew in [(0,0,1),(0,1,1)]:
                # Check for: binPosCbg ~= (1,?,0) and binPosNew ~= (0,?,1)
                # Some overlap, and the order is new-->cbg
                # So, potential accepted new CBG 1 position AFTER this one!
                # Check if the overlap is compatible with Orf order
                for i in range(0,len(posBin)):
                    (a1,a2,a3), (b1,b2,b3)       = posRel[i]
                    (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3) = posBin[i]
                    identicalorfs                = orfIdent[i]
                    if _is_compatible_overlap( (a1,a2,a3), (b1,b2,b3), (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3), identicalorfs ):
                        pass
                    else:
                        if verbose:
                            print "ABOUT TO BREAK THE FORLOOP I:"
                            print (a1,a2,a3), (b1,b2,b3)
                            print (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3)
                            cbg_overlap_ratio            = float(a2) / float(a1+a2+a3)
                            new_overlap_ratio            = float(b2) / float(b1+b2+b3)
                            overlap_ratio                = max([cbg_overlap_ratio, new_overlap_ratio])
                            print overlap_ratio
                        # No! incorrect positioning -> break
                        break
                else:
                    # EOF forloop nicely reached; compatible new codingblock
                    # The order is cbg-->new; this is the required location!

                    # check how it is positioned towards the other; NO OVERLAP ALOWED HERE!
                    if pos < len(self)-1:
                        next = self.codingblockgraphs[pos+1]
                        ( absPosNext, absPosNewNext, binPosNext, binPosNewNext,
                          orfIdentNext, posRelNext, posBinNext ) = relatively_positioned_towards(new,next)
                        # now check binPosNext & binPosNewNext: should be (1, 0, 0) & (0, 0, 1)
                        # That means -> no overlap with the next CBG
                        if binPosNext == (1, 0, 0) and binPosNewNext == (0, 0, 1):
                            # no overlap -> ready to store!
                            pass
                        else:
                            # there is overlap! Do not allow addition of this CBG
                            ###########################################################
                            if verbose:
                                print "OVERLAP WITH NEXT CBG!:", pos, new
                                print binPosNext, binPosNewNext 
                            ###########################################################
                            return False
                    else:
                        # no further CBGs in GSG, so no check possible (and no overlap possible ;-)
                        pass

                    # if only_try_adding, return a succesfull True!
                    if only_try_adding: return True

                    # Make deepcopy of this CBG in orde to prevent abnormalities
                    # when splitting/merging in lateron function
                    #tobeadded = deepcopy(new)
                    tobeadded = new
                    tobeadded.create_cache()
                    self.codingblockgraphs.insert( pos+1, tobeadded )
                    if verbose:
                        print "ADDING:", pos+1, new
                        print  binPosCbg, binPosNew
                    # succesfull insert; return True
                    return True

                # forloop broken -> incompatible new codingblock
                return False

            elif binPosCbg in [(0,0,1),(0,1,1)] and binPosNew in [(1,0,0),(1,1,0)]:
                # Check for: binPosCbg ~= (0,?,1) and binPosNew ~= (1,?,0)
                # The order is new-->cbg; this is the required location!
                # Check if the overlap is compatible with Orf order
                for i in range(0,len(posBin)):
                    (a1,a2,a3), (b1,b2,b3)       = posRel[i]
                    (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3) = posBin[i]
                    identicalorfs                = orfIdent[i]
                    if _is_compatible_overlap( (a1,a2,a3), (b1,b2,b3), (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3), identicalorfs ):
                        pass
                    else:
                        if verbose:
                            print "ABOUT TO BREAK THE FORLOOP II:"
                            print (a1,a2,a3), (b1,b2,b3)
                            print (Ba1,Ba2,Ba3), (Bb1,Bb2,Bb3)
                            cbg_overlap_ratio            = float(a2) / float(a1+a2+a3)
                            new_overlap_ratio            = float(b2) / float(b1+b2+b3)
                            overlap_ratio                = max([cbg_overlap_ratio, new_overlap_ratio])
                            print overlap_ratio
                        # No! incorrect positioning -> break
                        break
                else:
                    # EOF forloop nicely reached; compatible new codingblock
                    # The order is new-->cbg; this is the required location!

                    # if only_try_adding, return a succesfull True!
                    if only_try_adding: return True

                    # Make deepcopy of this CBG in orde to prevent abnormalities
                    # when splitting/merging in lateron function
                    #tobeadded = deepcopy(new)
                    tobeadded = new
                    tobeadded.create_cache()
                    self.codingblockgraphs.insert( pos, tobeadded )
                    # succesfull insert; return True
                    return True

                # forloop broken -> incompatible new codingblock
                return False

            else:
                # A more messy positioning (overlaps/repetitive etc.)
                if verbose:
                    print "WEIRD BINARY SUMMED ORDER!", absPosCbg, absPosNew, binPosCbg, binPosNew
                    print cbg
                    for i in range(0,len(posBin)):
                        print posRel[i], posBin[i], orfIdent[i]
                # Reject this new codingblockgrap
                return False
        else:
            # If eof for loop is reached, append to the end
            # of current genestructure

            # if only_try_adding, return a succesfull True!
            if only_try_adding: return True

            # Make deepcopy of this CBG in orde to prevent abnormalities
            # when splitting/merging in lateron function
            #tobeadded = deepcopy(new)
            tobeadded = new
            tobeadded.create_cache()
            self.codingblockgraphs.append( tobeadded )
            # check if this is the first added cbg to GeneStructure object
            if len(self.codingblockgraphs) == 1:
                # cache the genetreegraph object
                gtg = self.set_genetree()
                # set threshold values as a function of gtg
                self.initialize_first_added_cbg()

            # succesfull insert; return True
            return True