示例#1
0
 def Expand(self):
     if not self.mIsExpanded:
         self.mMapQuery2Sbjct = alignlib_lite.py_makeAlignataVector()
         alignlib_lite.py_fillAlignataCompressed(self.mMapQuery2Sbjct,
                                                 self.mQueryFrom, self.mQueryAli,
                                                 self.mSbjctFrom, self.mSbjctAli)
     self.mIsExpanded = True
示例#2
0
    def MapAlignment(self, map_query=None, map_sbjct=None):

        self.Expand()

        tmp = alignlib_lite.py_makeAlignataVector()

        if map_query:
            tmp.Clear()
            map_query.Expand()
            alignlib_lite.py_combineAlignata(tmp,
                                             map_query.mMapQuery2Sbjct,
                                             self.mMapQuery2Sbjct,
                                             alignlib_lite.py_RR)
            map_query.Clear()
            alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp)
            self.mQueryToken = map_query.mSbjctToken

        if map_sbjct:
            tmp.Clear()
            map_sbjct.Expand()
            alignlib_lite.py_combineAlignata(tmp,
                                             self.mMapQuery2Sbjct,
                                             map_sbjct.mMapQuery2Sbjct,
                                             alignlib_lite.py_CR)
            map_sbjct.Clear()
            alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp)
            self.mSbjctToken = map_sbjct.mSbjctToken

        self.Contract()

        return self.IsOk()
示例#3
0
    def MapRange(self, query_token, query_from, query_to):
        """map something."""
        map_query2sbjct = alignlib_lite.py_makeAlignataVector()
        alignlib_lite.py_fillAlignataCompressed(map_query2sbjct,
                                                self.mQueryFrom, self.mQueryAli,
                                                self.mSbjctFrom, self.mSbjctAli)

        new_from = 0

        if query_from <= map_query2sbjct.getRowTo():
            x = max(query_from, self.mQueryFrom)
            while map_query2sbjct.mapRowToCol(x) == 0:
                x += 1
                if x > map_query2sbjct.getRowTo():
                    break
            else:
                new_from = map_query2sbjct.mapRowToCol(x)

        new_to = 0

        if query_to >= map_query2sbjct.getRowFrom():
            x = min(query_to, self.mQueryTo)
            while map_query2sbjct.mapRowToCol(x) == 0:
                x -= 1
                if x < map_query2sbjct.getRowFrom():
                    break
            else:
                new_to = map_query2sbjct.mapRowToCol(x)

        return self.mSbjctToken, new_from, new_to
示例#4
0
 def Expand(self):
     if not self.mIsExpanded:
         self.mMapQuery2Sbjct = alignlib_lite.py_makeAlignataVector()
         alignlib_lite.py_fillAlignataCompressed(self.mMapQuery2Sbjct,
                                                 self.mQueryFrom, self.mQueryAli,
                                                 self.mSbjctFrom, self.mSbjctAli)
     self.mIsExpanded = True
示例#5
0
    def MapAlignment(self, map_query=None, map_sbjct=None):

        self.Expand()

        tmp = alignlib_lite.py_makeAlignataVector()

        if map_query:
            tmp.Clear()
            map_query.Expand()
            alignlib_lite.py_combineAlignata(tmp,
                                             map_query.mMapQuery2Sbjct,
                                             self.mMapQuery2Sbjct,
                                             alignlib_lite.py_RR)
            map_query.Clear()
            alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp)
            self.mQueryToken = map_query.mSbjctToken

        if map_sbjct:
            tmp.Clear()
            map_sbjct.Expand()
            alignlib_lite.py_combineAlignata(tmp,
                                             self.mMapQuery2Sbjct,
                                             map_sbjct.mMapQuery2Sbjct,
                                             alignlib_lite.py_CR)
            map_sbjct.Clear()
            alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp)
            self.mSbjctToken = map_sbjct.mSbjctToken

        self.Contract()

        return self.IsOk()
示例#6
0
    def MapRange(self, query_token, query_from, query_to):
        """map something."""
        map_query2sbjct = alignlib_lite.py_makeAlignataVector()
        alignlib_lite.py_fillAlignataCompressed(map_query2sbjct,
                                                self.mQueryFrom, self.mQueryAli,
                                                self.mSbjctFrom, self.mSbjctAli)

        new_from = 0

        if query_from <= map_query2sbjct.getRowTo():
            x = max(query_from, self.mQueryFrom)
            while map_query2sbjct.mapRowToCol(x) == 0:
                x += 1
                if x > map_query2sbjct.getRowTo():
                    break
            else:
                new_from = map_query2sbjct.mapRowToCol(x)

        new_to = 0

        if query_to >= map_query2sbjct.getRowFrom():
            x = min(query_to, self.mQueryTo)
            while map_query2sbjct.mapRowToCol(x) == 0:
                x -= 1
                if x < map_query2sbjct.getRowFrom():
                    break
            else:
                new_to = map_query2sbjct.mapRowToCol(x)

        return self.mSbjctToken, new_from, new_to
示例#7
0
    def GetClone( self ):
        """get copy of self.
        """

        m = Map()
        
        (m.mQueryToken, m.mSbjctToken, m.mEvalue,
         m.mQueryFrom, m.mQueryTo, m.mQueryAli,
         m.mSbjctFrom, m.mSbjctTo, m.mSbjctAli,
         m.mIsExpanded) = \
         (self.mQueryToken, self.mSbjctToken, self.mEvalue,
          self.mQueryFrom, self.mQueryTo, self.mQueryAli,
          self.mSbjctFrom, self.mSbjctTo, self.mSbjctAli,
          self.mIsExpanded)

        if self.mIsExpanded:
            m.mMapQuery2Sbjct = alignlib_lite.py_makeAlignataVector()
            alignlib_lite.py_copyAlignata( m.mMapQuery2Sbjct, self.mMapQuery2Sbjct )
            
        return m
示例#8
0
def getMapFromMali(seq1, seq2, gap_char="-"):
    """build map of positions between mali."""
    xpos = 0
    ypos = 0

    map_a2b = alignlib_lite.py_makeAlignataVector()
    # build map between genomic sequences:
    for p in range(len(seq1)):

        if     seq1[p] != gap_char and \
               seq2[p] != gap_char and \
               seq1[p] in string.uppercase and \
               seq2[p] in string.uppercase:
            map_a2b.addPairExplicit(xpos + 1, ypos + 1, 0)

        if seq1[p] != gap_char:
            xpos += 1
        if seq2[p] != gap_char:
            ypos += 1
    return map_a2b
示例#9
0
文件: MaliIO.py 项目: BioXiao/cgat
def getMapFromMali(seq1, seq2, gap_char="-"):
    """build map of positions between mali."""
    xpos = 0
    ypos = 0

    map_a2b = alignlib_lite.py_makeAlignataVector()
    # build map between genomic sequences:
    for p in range(len(seq1)):

        if seq1[p] != gap_char and \
           seq2[p] != gap_char and \
           seq1[p] in string.uppercase and \
           seq2[p] in string.uppercase:
            map_a2b.addPairExplicit(xpos + 1, ypos + 1, 0)

        if seq1[p] != gap_char:
            xpos += 1
        if seq2[p] != gap_char:
            ypos += 1
    return map_a2b
示例#10
0
        print "master alignment does no exist in the alignment."
        print globals()["__doc__"]
        sys.exit(1)

    master_from, master_ali, master_to, master_id = lines[param_master]

    print "# master=", master_id

    for index in range(len(lines)):
        if param_master == index:
            continue

        sbjct_from, sbjct_ali, sbjct_to, sbjct_id = lines[index]

        print "#", sbjct_id
        map_master2sbjct = alignlib_lite.py_makeAlignataVector()

        sbjct_index = string.atoi(sbjct_from)
        master_index = string.atoi(master_from)

        map_sbjct2residue = {}

        for x in range(len(master_ali)):

            if master_ali[x] not in GAPCHARS and sbjct_ali[x] not in GAPCHARS:
                map_master2sbjct.addPairExplicit(master_index, sbjct_index, 0)
                map_sbjct2residue[sbjct_index] = sbjct_ali[x]

            if master_ali[x] not in GAPCHARS:
                master_index += 1
示例#11
0
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: blast2fasta.py 2782 2009-09-10 11:40:29Z andreas $",
                             usage = globals()["__doc__"] )

    parser.add_option("-s", "--sequences", dest="filename_sequences", type="string",
                      help="filename with sequences."  )
    parser.add_option("-f", "--format", dest="format", type="string",
                      help="output format."  )
    
    parser.set_defaults(
        filename_sequences = None,
        format = "fasta",
        )

    (options, args) = E.Start( parser )

    if not options.filename_sequences:
        raise "please supply filename with sequences."

    sequences = Genomics.ReadPeptideSequences( open(options.filename_sequences, "r") )

    if options.loglevel >= 1:
        print "# read %i sequences" % len(sequences)
        
    for k in sequences.keys():
        sequences[k] = alignlib_lite.py_makeSequence( sequences[k] )

    if options.loglevel >= 2:
        print "# converted %i sequences" % len(sequences)
    
    ninput, noutput, nskipped, nfailed = 0, 0, 0, 0
    link = BlastAlignments.Link()

    ali = alignlib_lite.py_makeAlignataVector()
    
    for line in sys.stdin:
        
        if line[0] == "#": continue

        link.Read( line )
        ninput += 1

        if link.mQueryToken not in sequences or link.mSbjctToken not in sequences:
            nskipped += 1
            continue
        
        ali.Clear()
        alignlib_lite.py_fillAlignataCompressed( ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli )


        result = alignlib_lite.py_writePairAlignment( sequences[link.mQueryToken], sequences[link.mSbjctToken], ali ).split("\n")

        if len(result) != 3:
            nfailed += 1

        if options.format == "fasta":
            print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\
                  (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1],
                   link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] )
            
        noutput += 1
     
    E.info( "ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" % (ninput, noutput, nskipped, nfailed) )
    E.Stop()
示例#12
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("plain", "fasta", "clustal", "stockholm",
                               "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("add", ),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a",
                      "--alignment-method",
                      dest="alignment_method",
                      type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(open(options.parameters[0], "r"),
                          format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None: break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            ## add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(
                id, 0, len(cur_record.sequence),
                new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()
示例#13
0
def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-s",
                      "--sequences",
                      dest="filename_sequences",
                      type="string",
                      help="input file with sequences")

    parser.set_defaults(
        filename_sequences=None,
        gop=-10.0,
        gep=-1.0,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.filename_sequences:
        infile = open(options.filename_sequences, "r")
    else:
        infile = sys.stdin

    parser = FastaIterator.FastaIterator(infile)

    sequences = []
    while 1:
        cur_record = iterator.next()

        if cur_record is None:
            break
        sequences.append(
            (cur_record.title,
             alignlib_lite.py_makeSequence(re.sub(" ", "",
                                                  cur_record.sequence))))

    if options.filename_sequences:
        infile.close()

    alignator = alignlib_lite.py_makeAlignatorFullDP(options.gop, options.gep)
    map_a2b = alignlib_lite.py_makeAlignataVector()
    nsequences = len(sequences)

    for x in range(0, nsequences - 1):
        for y in range(x + 1, nsequences):
            alignator.Align(sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed(
                map_a2b)

            options.stdout.write(
                "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" %
                (sequences[x][0], sequences[y][0], map_a2b.getScore(),
                 map_a2b.getRowFrom(), map_a2b.getRowTo(), row_ali,
                 map_a2b.getColFrom(), map_a2b.getColTo(), col_ali,
                 map_a2b.getScore(),
                 100 * alignlib_lite.py_calculatePercentIdentity(
                     map_a2b, sequences[x][1], sequences[y][1]),
                 sequences[x][1].getLength(), sequences[y][1].getLength()))

    E.Stop()
示例#14
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: blast2fasta.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-s",
                      "--sequences",
                      dest="filename_sequences",
                      type="string",
                      help="filename with sequences.")
    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="string",
                      help="output format.")

    parser.set_defaults(
        filename_sequences=None,
        format="fasta",
    )

    (options, args) = E.Start(parser)

    if not options.filename_sequences:
        raise "please supply filename with sequences."

    sequences = Genomics.ReadPeptideSequences(
        open(options.filename_sequences, "r"))

    if options.loglevel >= 1:
        print "# read %i sequences" % len(sequences)

    for k in sequences.keys():
        sequences[k] = alignlib_lite.py_makeSequence(sequences[k])

    if options.loglevel >= 2:
        print "# converted %i sequences" % len(sequences)

    ninput, noutput, nskipped, nfailed = 0, 0, 0, 0
    link = BlastAlignments.Link()

    ali = alignlib_lite.py_makeAlignataVector()

    for line in sys.stdin:

        if line[0] == "#": continue

        link.Read(line)
        ninput += 1

        if link.mQueryToken not in sequences or link.mSbjctToken not in sequences:
            nskipped += 1
            continue

        ali.Clear()
        alignlib_lite.py_fillAlignataCompressed(ali, link.mQueryFrom,
                                                link.mQueryAli,
                                                link.mSbjctFrom,
                                                link.mSbjctAli)

        result = alignlib_lite.py_writePairAlignment(
            sequences[link.mQueryToken], sequences[link.mSbjctToken],
            ali).split("\n")

        if len(result) != 3:
            nfailed += 1

        if options.format == "fasta":
            print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\
                  (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1],
                   link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] )

        noutput += 1

    E.info("ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" %
           (ninput, noutput, nskipped, nfailed))
    E.Stop()
示例#15
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-i", "--input-format", dest="input_format", type="choice",
                      choices=(
                          "plain", "fasta", "clustal", "stockholm", "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o", "--output-format", dest="output_format", type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("add",),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a", "--alignment-method", dest="alignment_method", type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(
            open(options.parameters[0], "r"), format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None:
                break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            # add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(id, 0, len(
                cur_record.sequence), new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()
示例#16
0
        print "master alignment does no exist in the alignment."
        print globals()["__doc__"]
        sys.exit(1)

    master_from, master_ali, master_to, master_id = lines[param_master]

    print "# master=", master_id

    for index in range(len(lines)):
        if param_master == index:
            continue

        sbjct_from, sbjct_ali, sbjct_to, sbjct_id = lines[index]

        print "#", sbjct_id
        map_master2sbjct = alignlib_lite.py_makeAlignataVector()

        sbjct_index = string.atoi(sbjct_from)
        master_index = string.atoi(master_from)

        map_sbjct2residue = {}

        for x in range(len(master_ali)):

            if master_ali[x] not in GAPCHARS and sbjct_ali[x] not in GAPCHARS:
                map_master2sbjct.addPairExplicit(master_index, sbjct_index, 0)
                map_sbjct2residue[sbjct_index] = sbjct_ali[x]

            if master_ali[x] not in GAPCHARS:
                master_index += 1
def main( argv = None ):
    
    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $",
                             usage = globals()["__doc__"] )

    parser.add_option("-s", "--sequences", dest="filename_sequences", type="string",
                      help="input file with sequences"  )

    parser.set_defaults(
        filename_sequences = None,
        gop = -10.0,
        gep = -1.0,
        )

    (options, args) = E.Start( parser, add_pipe_options = True )

    if options.filename_sequences:
        infile = open(options.filename_sequences, "r")
    else:
        infile = sys.stdin

    parser = FastaIterator.FastaIterator( infile )

    sequences = []
    while 1:
        cur_record = iterator.next()
        
        if cur_record is None: break
        sequences.append( (cur_record.title, alignlib_lite.py_makeSequence(re.sub( " ", "", cur_record.sequence)) ) )
    
    if options.filename_sequences:
        infile.close()

    alignator = alignlib_lite.py_makeAlignatorFullDP( options.gop, options.gep )
    map_a2b = alignlib_lite.py_makeAlignataVector()
    nsequences = len(sequences)
    
    for x in range(0,nsequences-1):
        for y in range(x+1, nsequences):
            alignator.Align( sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed( map_a2b )
            
            options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (\
                sequences[x][0], sequences[y][0],
                map_a2b.getScore(),
                map_a2b.getRowFrom(),
                map_a2b.getRowTo(),
                row_ali,
                map_a2b.getColFrom(),
                map_a2b.getColTo(),
                col_ali,
                map_a2b.getScore(),
                100 * alignlib_lite.py_calculatePercentIdentity( map_a2b, sequences[x][1], sequences[y][1]),
                sequences[x][1].getLength(),
                sequences[y][1].getLength() ))
            

    E.Stop()