示例#1
0
    def process(matches):

        new = matches[0].copy()

        map_query2target = alignlib_lite.py_makeAlignmentBlocks()

        graph = networkx.DiGraph()
        graph.add_nodes_from(range(len(matches) + 2))

        matches.sort(key=lambda x: x.mQueryFrom)

        if Genomics.IsPositiveStrand(matches[0].strand):
            f = lambda x, y: x.mSbjctTo < y.mSbjctFrom
        else:
            f = lambda x, y: x.mSbjctFrom > y.mSbjctTo

        for x in range(0, len(matches)):

            xx = matches[x]
            if options.loglevel >= 6:
                options.stdlog.write("# graph: %2i %s\n" % (x, str(xx)))

            for y in range(x + 1, len(matches)):
                yy = matches[y]
                d = min(xx.mQueryTo, yy.mQueryTo) - \
                    max(xx.mQueryFrom, yy.mQueryFrom)
                if d > 0 or not f(xx, yy):
                    continue
                else:
                    graph.add_edge(x, y, {'weight': -d})

        source = len(matches)
        target = len(matches) + 1
        for x in range(len(matches)):
            xx = matches[x]
            graph.add_edge(source, x, {'weight': xx.mQueryFrom})
            graph.add_edge(
                x, target, {'weight': xx.mQueryLength - xx.mQueryTo})

        if options.loglevel >= 6:
            networkx.write_edgelist(graph, options.stdlog)

        path = networkx.dijkstra_path(graph, source, target)

        if options.loglevel >= 6:
            options.stdlog.write("# path: %s\n" % (str(path)))

        new_matches = [matches[x] for x in path[1:-1]]

        if len(matches) != len(new_matches):
            E.warn(("query=%s, target=%s, strand=%s: "
                    "removed overlapping/out-of-order segments: "
                    "before=%i, after=%i") %
                   (matches[0].mQueryId,
                    matches[0].mSbjctId,
                    matches[0].strand,
                    len(matches),
                    len(new_matches)))

        matches = new_matches

        for match in matches:
            m = match.getMapQuery2Target()
            alignlib_lite.py_addAlignment2Alignment(map_query2target, m)

        new.fromMap(map_query2target, use_strand=True)

        options.stdout.write(str(new) + "\n")
        options.stdout.flush()
        return 1
示例#2
0
    def Add(self,
            const_other,
            combine_contig=False,
            allow_overlap=False,
            contig_size=0,
            combine_queries=False,
            as_intron=False):
        """add one entry to another.

        This procedure allows to add

        - predictions on different contigs if combine_contig = True
        - overlapping predictions on the same query if allow_overlap = True
        - results from different queries if combine_queries = True

        - if as_intron is set to true, the new fragment is added as an intron.

        """

        # create working copies of each prediction
        other = const_other.getCopy()
        this = self.getCopy()

        other.Expand()
        this.Expand()

        if as_intron:
            code = "I"
        else:
            code = "P"

        # check for query overlaps
        if this.mQueryToken == other.mQueryToken:

            query_overlap = max(
                0,
                min(this.mQueryTo, other.mQueryTo) -
                max(this.mQueryFrom, other.mQueryFrom) + 1)

            if query_overlap > 0:

                if allow_overlap:
                    overlap = query_overlap
                    # if queries overlap, truncate this before adding the other
                    this.mMapPeptide2Translation.removeRowRegion(
                        this.mQueryTo - overlap + 1, this.mQueryTo)
                    other.mMapPeptide2Translation.moveAlignment(0, -overlap)
                    this.mQueryTo -= overlap
                    this.mTranslation = this.mTranslation[:-overlap]

                    # remove aligned residues from the back
                    for x in range(len(this.mMapPeptide2Genome) - 1, 0, -1):
                        if this.mMapPeptide2Genome[x][1] <= overlap:
                            overlap -= this.mMapPeptide2Genome[x][1]
                            del this.mMapPeptide2Genome[x]
                        else:
                            break
                    this.mMapPeptide2Genome[-1] = (
                        this.mMapPeptide2Genome[-1][0],
                        this.mMapPeptide2Genome[-1][1] - overlap,
                        this.mMapPeptide2Genome[-1][2] - overlap * 3)
                else:
                    raise ValueError, "refusing to add overlapping entries: overlap = %i, queries:\n%s\n%s\n, set allow_overlap = True " % (
                        query_overlap, str(this), str(other))

        else:
            if not combine_queries:
                raise ValueError, "refusing to add different queries - set combine_queries = True."

        if this.mSbjctToken != other.mSbjctToken or \
                this.mSbjctStrand != other.mSbjctStrand:
            if combine_contig:
                this.mSbjctToken += "-" + other.mSbjctToken
                this.mSbjctStrand += other.mSbjctStrand
            else:
                raise ValueError, "can not add different sbjct."

        sbjct_overlap = max(
            0,
            min(this.mSbjctGenomeTo, other.mSbjctGenomeTo) -
            max(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom), 0)

        if sbjct_overlap > 0:
            if not combine_contig:
                raise ValueError, "refusing to add overlapping entries: overlap = %i, sbjct:\n%s\n%s\n" % (
                    sbjct_overlap, str(this), str(other))

        if this.mSbjctToken == other.mSbjctToken:

            # set precedence
            if this.mSbjctGenomeFrom < other.mSbjctGenomeFrom:
                first = this
                second = other
            else:
                first = other
                second = this

            # get length of gap
            d_na = second.mSbjctGenomeFrom - first.mSbjctGenomeTo

            if this.mQueryToken != other.mQueryToken:
                d_aa = first.mQueryLength - first.mQueryTo
                # create a new virtual query by concatenating
                # the two queries
                this.mQueryToken += "-" + other.mQueryToken

                # sort out the alignment
                second.mMapPeptide2Translation.moveAlignment(
                    first.mQueryLength, 0)

                this.mQueryLength = first.mQueryLength + second.mQueryLength

            else:
                d_aa = second.mQueryFrom - first.mQueryTo - 1

            this.mSbjctGenomeFrom = min(this.mSbjctGenomeFrom,
                                        other.mSbjctGenomeFrom)
            this.mSbjctGenomeTo = max(this.mSbjctGenomeTo,
                                      other.mSbjctGenomeTo)

            this.mMapPeptide2Genome = first.mMapPeptide2Genome + \
                [(code, d_aa, d_na)] + second.mMapPeptide2Genome
            this.mTranslation = first.mTranslation + second.mTranslation

            second.mMapPeptide2Translation.moveAlignment(0, first.mSbjctTo - 1)

        else:
            # join on different contigs
            d_na = contig_size - this.mSbjctGenomeTo + \
                other.mSbjctGenomeFrom + query_overlap * 3
            d_aa = other.mQueryFrom - this.mQueryTo - 1
            this.mMapPeptide2Genome += [(code, d_aa, d_na), ] + \
                other.mMapPeptide2Genome
            this.mTranslation += other.mTranslation
            other.mMapPeptide2Translation.moveAlignment(0, this.mSbjctTo - 1)

            this.mSbjctGenomeFrom = this.mSbjctGenomeFrom
            this.mSbjctGenomeTo = contig_size + other.mSbjctGenomeTo

        # now fill self from first and this
        self.mQueryToken = first.mQueryToken
        self.mQueryLength = this.mQueryLength

        nthis = this.mMapPeptide2Translation.getLength(
        ) - this.mMapPeptide2Translation.getNumGaps()
        nother = other.mMapPeptide2Translation.getLength(
        ) - other.mMapPeptide2Translation.getNumGaps()

        self.mMapPeptide2Genome = first.mMapPeptide2Genome
        self.mSbjctGenomeFrom = this.mSbjctGenomeFrom
        self.mSbjctGenomeTo = this.mSbjctGenomeTo

        # there might be some reference counting issues, thus
        # do it the explicit way.
        alignlib_lite.py_addAlignment2Alignment(this.mMapPeptide2Translation,
                                                other.mMapPeptide2Translation)
        self.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector()
        alignlib_lite.py_addAlignment2Alignment(self.mMapPeptide2Translation,
                                                this.mMapPeptide2Translation)

        self.mTranslation = this.mTranslation

        self.mQueryFrom = self.mMapPeptide2Translation.getRowFrom()
        self.mQueryTo = self.mMapPeptide2Translation.getRowTo()
        self.mSbjctFrom = self.mMapPeptide2Translation.getColFrom()
        self.mSbjctTo = self.mMapPeptide2Translation.getColTo()

        self.mQueryCoverage = 100.0 * \
            (self.mQueryTo - self.mQueryFrom + 1) / float(self.mQueryLength)

        self.mAlignmentString = string.join(
            map(lambda x: string.join(map(str, x), " "),
                self.mMapPeptide2Genome), " ")

        f = alignlib_lite.py_AlignmentFormatEmssions(
            self.mMapPeptide2Translation)
        self.mQueryAli, self.mSbjctAli = f.mRowAlignment, f.mColAlignment

        # summary parameters
        self.mRank = max(this.mRank, other.mRank)
        self.score += other.score
        self.mNGaps += other.mNGaps
        self.mNFrameShifts += other.mNFrameShifts
        self.mNIntrons += other.mNIntrons + 1
        self.mNStopCodons += other.mNStopCodons

        nnew = self.mMapPeptide2Translation.getLength(
        ) - self.mMapPeptide2Translation.getNumGaps()

        self.mPercentIdentity = min(
            100.0,
            (self.mPercentIdentity * nthis + other.mPercentIdentity * nother) /
            nnew)
        self.mPercentSimilarity = min(
            100.0, (self.mPercentSimilarity * nthis +
                    other.mPercentSimilarity * nother) / nnew)

        self.mNAssembled += 1 + other.mNAssembled
示例#3
0
文件: psl2psl.py 项目: Q-KIM/cgat
    def process(matches):

        new = matches[0].copy()

        map_query2target = alignlib_lite.py_makeAlignmentBlocks()

        graph = networkx.DiGraph()
        graph.add_nodes_from(xrange(len(matches) + 2))

        matches.sort(key=lambda x: x.mQueryFrom)

        if Genomics.IsPositiveStrand(matches[0].strand):
            f = lambda x, y: x.mSbjctTo < y.mSbjctFrom
        else:
            f = lambda x, y: x.mSbjctFrom > y.mSbjctTo

        for x in range(0, len(matches)):

            xx = matches[x]
            if options.loglevel >= 6:
                options.stdlog.write("# graph: %2i %s\n" % (x, str(xx)))

            for y in range(x + 1, len(matches)):
                yy = matches[y]
                d = min(xx.mQueryTo, yy.mQueryTo) - \
                    max(xx.mQueryFrom, yy.mQueryFrom)
                if d > 0 or not f(xx, yy):
                    continue
                else:
                    graph.add_edge(x, y, {'weight': -d})

        source = len(matches)
        target = len(matches) + 1
        for x in range(len(matches)):
            xx = matches[x]
            graph.add_edge(source, x, {'weight': xx.mQueryFrom})
            graph.add_edge(
                x, target, {'weight': xx.mQueryLength - xx.mQueryTo})

        if options.loglevel >= 6:
            networkx.write_edgelist(graph, options.stdlog)

        path = networkx.dijkstra_path(graph, source, target)

        if options.loglevel >= 6:
            options.stdlog.write("# path: %s\n" % (str(path)))

        new_matches = [matches[x] for x in path[1:-1]]

        if len(matches) != len(new_matches):
            E.warn(("query=%s, target=%s, strand=%s: "
                    "removed overlapping/out-of-order segments: "
                    "before=%i, after=%i") %
                   (matches[0].mQueryId,
                    matches[0].mSbjctId,
                    matches[0].strand,
                    len(matches),
                    len(new_matches)))

        matches = new_matches

        for match in matches:
            m = match.getMapQuery2Target()
            alignlib_lite.py_addAlignment2Alignment(map_query2target, m)

        new.fromMap(map_query2target, use_strand=True)

        options.stdout.write(str(new) + "\n")
        options.stdout.flush()
        return 1
示例#4
0
    def Add( self, const_other,
             combine_contig = False,
             allow_overlap = False,
             contig_size = 0,
             combine_queries = False,
             as_intron = False ):
        """add one entry to another.

        This procedure allows to add
        
        - predictions on different contigs if combine_contig = True
        - overlapping predictions on the same query if allow_overlap = True
        - results from different queries if combine_queries = True

        - if as_intron is set to true, the new fragment is added as an intron.
        
        """

        ## create working copies of each prediction
        other = const_other.getCopy()
        this  = self.getCopy()

        other.Expand()
        this.Expand()

        if as_intron:
            code = "I"
        else:
            code = "P"

        ## check for query overlaps
        if this.mQueryToken == other.mQueryToken:

            query_overlap = max( 0, min(this.mQueryTo, other.mQueryTo) -\
                                 max(this.mQueryFrom, other.mQueryFrom) + 1)

            if query_overlap > 0:

                if allow_overlap:
                    overlap = query_overlap
                    ## if queries overlap, truncate this before adding the other
                    this.mMapPeptide2Translation.removeRowRegion( this.mQueryTo - overlap + 1, this.mQueryTo )
                    other.mMapPeptide2Translation.moveAlignment( 0, -overlap )
                    this.mQueryTo -= overlap
                    this.mTranslation = this.mTranslation[:-overlap]

                    ## remove aligned residues from the back
                    for x in range(len(this.mMapPeptide2Genome) - 1, 0, -1):
                        if this.mMapPeptide2Genome[x][1] <= overlap:
                            overlap -= this.mMapPeptide2Genome[x][1]
                            del this.mMapPeptide2Genome[x]
                        else:
                            break
                    this.mMapPeptide2Genome[-1] = (this.mMapPeptide2Genome[-1][0],
                                                   this.mMapPeptide2Genome[-1][1] - overlap,
                                                   this.mMapPeptide2Genome[-1][2] - overlap * 3)
                else:
                    raise ValueError, "refusing to add overlapping entries: overlap = %i, queries:\n%s\n%s\n, set allow_overlap = True " % (query_overlap, str(this), str(other))


        else:
            if not combine_queries:
                raise ValueError, "refusing to add different queries - set combine_queries = True."

        if this.mSbjctToken != other.mSbjctToken or \
               this.mSbjctStrand != other.mSbjctStrand :
            if combine_contig:
                this.mSbjctToken += "-" + other.mSbjctToken
                this.mSbjctStrand += other.mSbjctStrand
            else:
                raise ValueError, "can not add different sbjct."                

        sbjct_overlap = max(0, min(this.mSbjctGenomeTo, other.mSbjctGenomeTo) -\
                            max(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom), 0)

        if sbjct_overlap > 0:
            if not combine_contig:
                raise ValueError, "refusing to add overlapping entries: overlap = %i, sbjct:\n%s\n%s\n" % (sbjct_overlap, str(this), str(other))

        if this.mSbjctToken == other.mSbjctToken:

            ## set precedence
            if this.mSbjctGenomeFrom < other.mSbjctGenomeFrom:
                first = this
                second = other
            else:
                first = other
                second = this

            ## get length of gap
            d_na = second.mSbjctGenomeFrom - first.mSbjctGenomeTo

            if this.mQueryToken != other.mQueryToken:
                d_aa = first.mQueryLength - first.mQueryTo                 
                # create a new virtual query by concatenating
                # the two queries
                this.mQueryToken += "-" + other.mQueryToken

                # sort out the alignment
                second.mMapPeptide2Translation.moveAlignment( first.mQueryLength, 0 )

                this.mQueryLength = first.mQueryLength + second.mQueryLength

            else:
                d_aa = second.mQueryFrom - first.mQueryTo - 1
            
            this.mSbjctGenomeFrom = min(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom )
            this.mSbjctGenomeTo = max(this.mSbjctGenomeTo, other.mSbjctGenomeTo )        

            this.mMapPeptide2Genome = first.mMapPeptide2Genome + [(code, d_aa, d_na)] + second.mMapPeptide2Genome
            this.mTranslation = first.mTranslation + second.mTranslation

            second.mMapPeptide2Translation.moveAlignment( 0, first.mSbjctTo - 1 )
            
        else:
            ## join on different contigs
            d_na = contig_size - this.mSbjctGenomeTo + other.mSbjctGenomeFrom + query_overlap * 3
            d_aa = other.mQueryFrom - this.mQueryTo - 1
            this.mMapPeptide2Genome += [(code, d_aa, d_na),] + other.mMapPeptide2Genome
            this.mTranslation += other.mTranslation 
            other.mMapPeptide2Translation.moveAlignment( 0, this.mSbjctTo - 1 )

            this.mSbjctGenomeFrom = this.mSbjctGenomeFrom
            this.mSbjctGenomeTo = contig_size + other.mSbjctGenomeTo

        ## now fill self from first and this
        self.mQueryToken = first.mQueryToken
        self.mQueryLength = this.mQueryLength
        
        nthis  = this.mMapPeptide2Translation.getLength() - this.mMapPeptide2Translation.getNumGaps()
        nother = other.mMapPeptide2Translation.getLength() - other.mMapPeptide2Translation.getNumGaps()

        self.mMapPeptide2Genome = first.mMapPeptide2Genome
        self.mSbjctGenomeFrom = this.mSbjctGenomeFrom
        self.mSbjctGenomeTo= this.mSbjctGenomeTo
        
        ## there might be some reference counting issues, thus
        ## do it the explicit way.
        alignlib_lite.py_addAlignment2Alignment( this.mMapPeptide2Translation, other.mMapPeptide2Translation)
        self.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector()
        alignlib_lite.py_addAlignment2Alignment( self.mMapPeptide2Translation, this.mMapPeptide2Translation )
        
        self.mTranslation = this.mTranslation
        
        self.mQueryFrom = self.mMapPeptide2Translation.getRowFrom()
        self.mQueryTo = self.mMapPeptide2Translation.getRowTo()
        self.mSbjctFrom = self.mMapPeptide2Translation.getColFrom()
        self.mSbjctTo = self.mMapPeptide2Translation.getColTo()
        
        self.mQueryCoverage = 100.0 * (self.mQueryTo - self.mQueryFrom + 1) / float(self.mQueryLength)

        self.mAlignmentString = string.join( map( \
                                      lambda x: string.join(map(str, x), " "),
                                      self.mMapPeptide2Genome), " ")

        f = alignlib_lite.py_AlignmentFormatEmssions( self.mMapPeptide2Translation )
        self.mQueryAli, self.mSbjctAli = f.mRowAlignment, f.mColAlignment

        ## summary parameters
        self.mRank = max( this.mRank, other.mRank)
        self.score += other.score
        self.mNGaps += other.mNGaps
        self.mNFrameShifts += other.mNFrameShifts
        self.mNIntrons += other.mNIntrons + 1
        self.mNStopCodons += other.mNStopCodons
        
        nnew = self.mMapPeptide2Translation.getLength() - self.mMapPeptide2Translation.getNumGaps()
        
        self.mPercentIdentity = min( 100.0, (self.mPercentIdentity * nthis + other.mPercentIdentity * nother) / nnew )
        self.mPercentSimilarity = min( 100.0, (self.mPercentSimilarity * nthis + other.mPercentSimilarity * nother) / nnew )

        self.mNAssembled += 1 + other.mNAssembled