Пример #1
0
def parse_fasta_and_write_new_file(results, contigs, output):
    #pdb.set_trace()
    for taxon, rows in results.iteritems():
        outp = FastaWriter(os.path.join(output, "{}.fasta".format(taxon)))
        inp = "{}.contigs.fasta".format(taxon.replace('_', '-'))
        fasta_file = FastaReader(os.path.join(contigs, inp))
        for fasta in fasta_file:
            name = '_'.join(
                fasta.identifier.lstrip('>').split('_')[:2]).lower()
            if name in rows:
                outp.write(fasta)
        outp.close()
def main():
    args = get_args()
    pth = os.path.join(args.fastas, "*.fasta")
    outf = FastaWriter(args.outfile)
    conn = sqlite3.connect(args.db)
    cur = conn.cursor()
    counter = 0
    for infile in glob.glob(pth):
        sp = os.path.basename(infile).split('.')[0].replace('-', '_')
        species = sp.replace('_', ' ').capitalize()
        print "Working on {}".format(species)
        partial = species.split(' ')[0].lower()[:3]
        for read in FastaReader(infile):
            # check for header match, if match get locus name for header
            nn = read.identifier.split("_")[:2]
            nn = "{}_{}".format(nn[0].strip('>').lower(), nn[1].lower())
            query = "SELECT uce FROM match_map WHERE {0} = '{1}(+)' OR {0} = '{1}(-)'".format(
                sp, nn)
            cur.execute(query)
            result = cur.fetchall()
            #pdb.set_trace()
            if result:
                assert len(result) == 1, "More than 1 result"
                #pdb.set_trace()
                if args.fish:
                    uce = result[0][0].split('_')[0]
                else:
                    uce = result[0][0]
                read.identifier = """{3}{2} [organism={0}] [molecule=DNA] [moltype=genomic] [location=genomic] [note=ultra conserved element locus {1}] {0} ultra-conserved element locus {1}.""".format(
                    species, uce, partial, counter)
                # write all to a common fasta
                outf.write(read)
                # if not match, pass
                counter += 1
            else:
                pass
    outf.close()