def Expand(self): if not self.mIsExpanded: self.mMapQuery2Sbjct = alignlib_lite.py_makeAlignataVector() alignlib_lite.py_fillAlignataCompressed(self.mMapQuery2Sbjct, self.mQueryFrom, self.mQueryAli, self.mSbjctFrom, self.mSbjctAli) self.mIsExpanded = True
def MapAlignment(self, map_query=None, map_sbjct=None): self.Expand() tmp = alignlib_lite.py_makeAlignataVector() if map_query: tmp.Clear() map_query.Expand() alignlib_lite.py_combineAlignata(tmp, map_query.mMapQuery2Sbjct, self.mMapQuery2Sbjct, alignlib_lite.py_RR) map_query.Clear() alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp) self.mQueryToken = map_query.mSbjctToken if map_sbjct: tmp.Clear() map_sbjct.Expand() alignlib_lite.py_combineAlignata(tmp, self.mMapQuery2Sbjct, map_sbjct.mMapQuery2Sbjct, alignlib_lite.py_CR) map_sbjct.Clear() alignlib_lite.py_copyAlignata(self.mMapQuery2Sbjct, tmp) self.mSbjctToken = map_sbjct.mSbjctToken self.Contract() return self.IsOk()
def MapRange(self, query_token, query_from, query_to): """map something.""" map_query2sbjct = alignlib_lite.py_makeAlignataVector() alignlib_lite.py_fillAlignataCompressed(map_query2sbjct, self.mQueryFrom, self.mQueryAli, self.mSbjctFrom, self.mSbjctAli) new_from = 0 if query_from <= map_query2sbjct.getRowTo(): x = max(query_from, self.mQueryFrom) while map_query2sbjct.mapRowToCol(x) == 0: x += 1 if x > map_query2sbjct.getRowTo(): break else: new_from = map_query2sbjct.mapRowToCol(x) new_to = 0 if query_to >= map_query2sbjct.getRowFrom(): x = min(query_to, self.mQueryTo) while map_query2sbjct.mapRowToCol(x) == 0: x -= 1 if x < map_query2sbjct.getRowFrom(): break else: new_to = map_query2sbjct.mapRowToCol(x) return self.mSbjctToken, new_from, new_to
def GetClone( self ): """get copy of self. """ m = Map() (m.mQueryToken, m.mSbjctToken, m.mEvalue, m.mQueryFrom, m.mQueryTo, m.mQueryAli, m.mSbjctFrom, m.mSbjctTo, m.mSbjctAli, m.mIsExpanded) = \ (self.mQueryToken, self.mSbjctToken, self.mEvalue, self.mQueryFrom, self.mQueryTo, self.mQueryAli, self.mSbjctFrom, self.mSbjctTo, self.mSbjctAli, self.mIsExpanded) if self.mIsExpanded: m.mMapQuery2Sbjct = alignlib_lite.py_makeAlignataVector() alignlib_lite.py_copyAlignata( m.mMapQuery2Sbjct, self.mMapQuery2Sbjct ) return m
def getMapFromMali(seq1, seq2, gap_char="-"): """build map of positions between mali.""" xpos = 0 ypos = 0 map_a2b = alignlib_lite.py_makeAlignataVector() # build map between genomic sequences: for p in range(len(seq1)): if seq1[p] != gap_char and \ seq2[p] != gap_char and \ seq1[p] in string.uppercase and \ seq2[p] in string.uppercase: map_a2b.addPairExplicit(xpos + 1, ypos + 1, 0) if seq1[p] != gap_char: xpos += 1 if seq2[p] != gap_char: ypos += 1 return map_a2b
print "master alignment does no exist in the alignment." print globals()["__doc__"] sys.exit(1) master_from, master_ali, master_to, master_id = lines[param_master] print "# master=", master_id for index in range(len(lines)): if param_master == index: continue sbjct_from, sbjct_ali, sbjct_to, sbjct_id = lines[index] print "#", sbjct_id map_master2sbjct = alignlib_lite.py_makeAlignataVector() sbjct_index = string.atoi(sbjct_from) master_index = string.atoi(master_from) map_sbjct2residue = {} for x in range(len(master_ali)): if master_ali[x] not in GAPCHARS and sbjct_ali[x] not in GAPCHARS: map_master2sbjct.addPairExplicit(master_index, sbjct_index, 0) map_sbjct2residue[sbjct_index] = sbjct_ali[x] if master_ali[x] not in GAPCHARS: master_index += 1
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version = "%prog version: $Id: blast2fasta.py 2782 2009-09-10 11:40:29Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-s", "--sequences", dest="filename_sequences", type="string", help="filename with sequences." ) parser.add_option("-f", "--format", dest="format", type="string", help="output format." ) parser.set_defaults( filename_sequences = None, format = "fasta", ) (options, args) = E.Start( parser ) if not options.filename_sequences: raise "please supply filename with sequences." sequences = Genomics.ReadPeptideSequences( open(options.filename_sequences, "r") ) if options.loglevel >= 1: print "# read %i sequences" % len(sequences) for k in sequences.keys(): sequences[k] = alignlib_lite.py_makeSequence( sequences[k] ) if options.loglevel >= 2: print "# converted %i sequences" % len(sequences) ninput, noutput, nskipped, nfailed = 0, 0, 0, 0 link = BlastAlignments.Link() ali = alignlib_lite.py_makeAlignataVector() for line in sys.stdin: if line[0] == "#": continue link.Read( line ) ninput += 1 if link.mQueryToken not in sequences or link.mSbjctToken not in sequences: nskipped += 1 continue ali.Clear() alignlib_lite.py_fillAlignataCompressed( ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli ) result = alignlib_lite.py_writePairAlignment( sequences[link.mQueryToken], sequences[link.mSbjctToken], ali ).split("\n") if len(result) != 3: nfailed += 1 if options.format == "fasta": print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\ (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1], link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] ) noutput += 1 E.info( "ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" % (ninput, noutput, nskipped, nfailed) ) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=("plain", "fasta", "clustal", "stockholm", "phylip"), help="input format of multiple alignment") parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("plain", "fasta", "stockholm", "phylip"), help="output format of multiple alignment") parser.add_option("-m", "--method", dest="method", type="choice", choices=("add", ), help="""method to use to build multiple alignment.""") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="parameter stack for methods that require one.") parser.add_option("-a", "--alignment-method", dest="alignment_method", type="choice", choices=("sw", "nw"), help="alignment_method [%default].") parser.set_defaults( input_format="fasta", output_format="fasta", method=None, parameters="", gop=-10.0, gep=-1.0, alignment_method="sw", ) (options, args) = E.Start(parser) options.parameters = options.parameters.split(",") iterator = FastaIterator.iterate(sys.stdin) if options.method == "add": mali = Mali.Mali() mali.readFromFile(open(options.parameters[0], "r"), format=options.input_format) del options.parameters[0] old_length = mali.getLength() new_mali = convertMali2Mali(mali) if options.alignment_method == "sw": alignator = alignlib_lite.py_makeAlignatorFullDP( options.gop, options.gep) else: alignator = alignlib_lite.py_makeAlignatorFullDPGlobal( options.gop, options.gep) while 1: cur_record = iterator.next() if cur_record is None: break map_mali2seq = alignlib_lite.py_makeAlignataVector() sequence = alignlib_lite.py_makeSequence(cur_record.sequence) profile = alignlib_lite.py_makeProfileFromMali(new_mali) if options.loglevel >= 4: options.stdlog.write(profile.Write()) alignator.Align(profile, sequence, map_mali2seq) if options.loglevel >= 3: options.stdlog.write(map_mali2seq.Write()) ## add sequence to mali a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence) a.thisown = 0 new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1) id = cur_record.title mali.mIdentifiers.append(id) mali.mMali[id] = Mali.AlignedString( id, 0, len(cur_record.sequence), new_mali.getRow(new_mali.getWidth() - 1).getString()) # substitute for x in range(old_length): mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow( x).getString() mali.writeToFile(sys.stdout, format=options.output_format) E.Stop()
def main(argv=None): if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-s", "--sequences", dest="filename_sequences", type="string", help="input file with sequences") parser.set_defaults( filename_sequences=None, gop=-10.0, gep=-1.0, ) (options, args) = E.Start(parser, add_pipe_options=True) if options.filename_sequences: infile = open(options.filename_sequences, "r") else: infile = sys.stdin parser = FastaIterator.FastaIterator(infile) sequences = [] while 1: cur_record = iterator.next() if cur_record is None: break sequences.append( (cur_record.title, alignlib_lite.py_makeSequence(re.sub(" ", "", cur_record.sequence)))) if options.filename_sequences: infile.close() alignator = alignlib_lite.py_makeAlignatorFullDP(options.gop, options.gep) map_a2b = alignlib_lite.py_makeAlignataVector() nsequences = len(sequences) for x in range(0, nsequences - 1): for y in range(x + 1, nsequences): alignator.Align(sequences[x][1], sequences[y][1], map_a2b) row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed( map_a2b) options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (sequences[x][0], sequences[y][0], map_a2b.getScore(), map_a2b.getRowFrom(), map_a2b.getRowTo(), row_ali, map_a2b.getColFrom(), map_a2b.getColTo(), col_ali, map_a2b.getScore(), 100 * alignlib_lite.py_calculatePercentIdentity( map_a2b, sequences[x][1], sequences[y][1]), sequences[x][1].getLength(), sequences[y][1].getLength())) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: blast2fasta.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-s", "--sequences", dest="filename_sequences", type="string", help="filename with sequences.") parser.add_option("-f", "--format", dest="format", type="string", help="output format.") parser.set_defaults( filename_sequences=None, format="fasta", ) (options, args) = E.Start(parser) if not options.filename_sequences: raise "please supply filename with sequences." sequences = Genomics.ReadPeptideSequences( open(options.filename_sequences, "r")) if options.loglevel >= 1: print "# read %i sequences" % len(sequences) for k in sequences.keys(): sequences[k] = alignlib_lite.py_makeSequence(sequences[k]) if options.loglevel >= 2: print "# converted %i sequences" % len(sequences) ninput, noutput, nskipped, nfailed = 0, 0, 0, 0 link = BlastAlignments.Link() ali = alignlib_lite.py_makeAlignataVector() for line in sys.stdin: if line[0] == "#": continue link.Read(line) ninput += 1 if link.mQueryToken not in sequences or link.mSbjctToken not in sequences: nskipped += 1 continue ali.Clear() alignlib_lite.py_fillAlignataCompressed(ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli) result = alignlib_lite.py_writePairAlignment( sequences[link.mQueryToken], sequences[link.mSbjctToken], ali).split("\n") if len(result) != 3: nfailed += 1 if options.format == "fasta": print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\ (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1], link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] ) noutput += 1 E.info("ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" % (ninput, noutput, nskipped, nfailed)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=( "plain", "fasta", "clustal", "stockholm", "phylip"), help="input format of multiple alignment") parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("plain", "fasta", "stockholm", "phylip"), help="output format of multiple alignment") parser.add_option("-m", "--method", dest="method", type="choice", choices=("add",), help="""method to use to build multiple alignment.""") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="parameter stack for methods that require one.") parser.add_option("-a", "--alignment-method", dest="alignment_method", type="choice", choices=("sw", "nw"), help="alignment_method [%default].") parser.set_defaults( input_format="fasta", output_format="fasta", method=None, parameters="", gop=-10.0, gep=-1.0, alignment_method="sw", ) (options, args) = E.Start(parser) options.parameters = options.parameters.split(",") iterator = FastaIterator.iterate(sys.stdin) if options.method == "add": mali = Mali.Mali() mali.readFromFile( open(options.parameters[0], "r"), format=options.input_format) del options.parameters[0] old_length = mali.getLength() new_mali = convertMali2Mali(mali) if options.alignment_method == "sw": alignator = alignlib_lite.py_makeAlignatorFullDP( options.gop, options.gep) else: alignator = alignlib_lite.py_makeAlignatorFullDPGlobal( options.gop, options.gep) while 1: cur_record = iterator.next() if cur_record is None: break map_mali2seq = alignlib_lite.py_makeAlignataVector() sequence = alignlib_lite.py_makeSequence(cur_record.sequence) profile = alignlib_lite.py_makeProfileFromMali(new_mali) if options.loglevel >= 4: options.stdlog.write(profile.Write()) alignator.Align(profile, sequence, map_mali2seq) if options.loglevel >= 3: options.stdlog.write(map_mali2seq.Write()) # add sequence to mali a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence) a.thisown = 0 new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1) id = cur_record.title mali.mIdentifiers.append(id) mali.mMali[id] = Mali.AlignedString(id, 0, len( cur_record.sequence), new_mali.getRow(new_mali.getWidth() - 1).getString()) # substitute for x in range(old_length): mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow( x).getString() mali.writeToFile(sys.stdout, format=options.output_format) E.Stop()
def main( argv = None ): if argv == None: argv = sys.argv parser = E.OptionParser( version = "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-s", "--sequences", dest="filename_sequences", type="string", help="input file with sequences" ) parser.set_defaults( filename_sequences = None, gop = -10.0, gep = -1.0, ) (options, args) = E.Start( parser, add_pipe_options = True ) if options.filename_sequences: infile = open(options.filename_sequences, "r") else: infile = sys.stdin parser = FastaIterator.FastaIterator( infile ) sequences = [] while 1: cur_record = iterator.next() if cur_record is None: break sequences.append( (cur_record.title, alignlib_lite.py_makeSequence(re.sub( " ", "", cur_record.sequence)) ) ) if options.filename_sequences: infile.close() alignator = alignlib_lite.py_makeAlignatorFullDP( options.gop, options.gep ) map_a2b = alignlib_lite.py_makeAlignataVector() nsequences = len(sequences) for x in range(0,nsequences-1): for y in range(x+1, nsequences): alignator.Align( sequences[x][1], sequences[y][1], map_a2b) row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed( map_a2b ) options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (\ sequences[x][0], sequences[y][0], map_a2b.getScore(), map_a2b.getRowFrom(), map_a2b.getRowTo(), row_ali, map_a2b.getColFrom(), map_a2b.getColTo(), col_ali, map_a2b.getScore(), 100 * alignlib_lite.py_calculatePercentIdentity( map_a2b, sequences[x][1], sequences[y][1]), sequences[x][1].getLength(), sequences[y][1].getLength() )) E.Stop()