示例#1
0
def mauve_pw_align(ref, query, dirs):
    """Set up and perform a pairwise alignment with Mauve."""
    # set outputs
    mauve_outfile = dirs['mauve']+ref.name+"_"+query.name+".mauve"
    segfile = dirs['aln_segs']+ref.name+"_"+query.name+"_segs.txt"
    # check for existing alignment
    if path.exists(segfile):
        print "already done"
    else:
        # prep segments file
        open(segfile, 'w').write('')
        # purge any pre-existing sslist files
        sslist_files = from_dir(dirs['seqfiles'], re.compile(r'.*\.sslist.*'))
        for sslist in sslist_files:
            try: os.remove(dirs['seqfiles']+sslist)
            except Exception: raise
        # do Mauve alignment
        file_list = [ref.gbk, query.gbk]
        align_mauve(file_list, mauve_outfile)
        try:
            # parse Mauve output (without initial clumping)
            coords = mauver_load2_k0(mauve_outfile+".backbone", 0)
            print "\nSegment results:", len(coords), '->',
            # chop segments that are too long
            chop_array = chop_rows(coords, max_size, chop_mode)
            print len(chop_array), 'segments <', max_size, 'bp'
            # make detailed pairwise alignments of the segments
            print "Aligning segments ..."
            ref_rec = load_genbank(ref.gbk)
            query_rec = load_genbank(query.gbk)
            id = iter_align(chop_array, ref_rec, query_rec,
                            dirs['aln_segs'], segfile)
            print "Results:", id, "% id. overall"
        except IOError:
            print "\nERROR: Mauve alignment failed"
示例#2
0
def mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size,
                   chop_mode, mauve_exec, mtype):
    """Set up and perform a pairwise alignment with Mauve."""
    aln_dir = r_root_dir + run + dirs['aln_segs']
    mauve_dir = r_root_dir + run + dirs['mauve']
    # set outputs
    mauve_outfile = mauve_dir + ref.name + "_" + query.name + ".mauve"
    segfile = aln_dir + ref.name + "_" + query.name + "_segs.txt"
    # check for existing alignment
    if path.exists(segfile):
        print "already done"
    else:
        # prep segments file
        open(segfile, 'w').write('')
        # purge any pre-existing sslist files
        sslist_files = from_dir(g_root_dir, re.compile(r'.*\.sslist.*'))
        for sslist in sslist_files:
            try:
                os.remove(g_root_dir + sslist)
            except Exception:
                raise
        # do Mauve alignment
        file_list = [ref.gbk, query.gbk]
        align_mauve(file_list, mauve_outfile, mauve_exec)
        try:
            # parse Mauve output (without initial clumping)
            coords = mauver_load2_k0(mauve_outfile + ".backbone", 0, mtype)
            print "\nSegment results:", len(coords), '->',
            # chop segments that are too long
            chop_array = chop_rows(coords, max_size, chop_mode, mtype)
            print len(chop_array), 'segments <', max_size, 'bp'
            # make detailed pairwise alignments of the segments
            print "Aligning segments ..."
            ref_rec = load_genbank(ref.gbk)
            query_rec = load_genbank(query.gbk)
            id = iter_align(chop_array, ref_rec, query_rec, aln_dir, segfile)
            print "Results:", id, "% id. overall"
        except IOError:
            print "\nERROR: Mauve alignment failed"
            raise
示例#3
0
def align_ctg2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes,
                  mauve_exec, max_size, chop_mode, mtype):
    """Align contigs pairwise to the reference contig."""
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir + run_id + "/"
    ref_ctg_file = run_ref.file
    mauve_root = run_root + run_dirs['mauve_out_dir'] + ref_n + "/contigs/"
    segments_root = run_root + run_dirs['aln_seg_dir'] + ref_n + "/contigs/"
    q_ctgs_root = run_root + run_dirs['match_out_dir'] + ref_n + "/"
    ensure_dir([segments_root])
    print " ", ref_n
    # log
    logstring = "".join(["\n\n# Align contigs to ref @", timestamp, "\n"])
    run_ref.log(logstring)
    # cycle through genomes
    for genome in genomes:
        # set inputs and outputs
        g_name = genome['name']
        ctgs_fas_dir = q_ctgs_root + g_name + "/"
        mauve_dir = mauve_root + g_name + "/"
        aln_segs_root = segments_root + g_name + "/"
        ensure_dir([mauve_dir])
        print "\t", g_name, "...",
        # log
        logstring = "".join(["\n", g_name])
        run_ref.log(logstring)
        # list genbank files in matches directory
        dir_contents = listdir(ctgs_fas_dir)
        for item in dir_contents:
            pattern = re.compile(r'.*_(\d*)\.fas$')
            match = pattern.match(item)
            if match:
                ctg_num = match.group(1)
                print ctg_num,
                logstring = "".join(["\t", ctg_num])
                run_ref.log(logstring)
                # set inputs and outputs
                q_contig = ctgs_fas_dir + item
                file_list = (ref_ctg_file, q_contig)
                mauve_outfile = mauve_dir + ctg_num + ".mauve"
                aln_segs_dir = aln_segs_root + ctg_num + "/"
                ensure_dir([aln_segs_dir])
                segfile = aln_segs_dir + ctg_num + "_" + ref_n + "_segs.txt"
                open(segfile, 'w').write('')
                # do Mauve alignment
                try:
                    open(ref_ctg_file, 'r')
                    open(q_contig, 'r')
                except IOError:
                    msg = "\nERROR: File missing, cannot align\n\t\t\t"
                    run_ref.log(msg)
                    print msg
                else:
                    align_mauve(file_list, mauve_outfile, mauve_exec)
                    try:
                        # parse Mauve output (without initial clumping)
                        coords = mauver_load2_k0(mauve_outfile + ".backbone",
                                                 0, mtype)
                        # chop segments that are too long
                        chop_array = chop_rows(coords, max_size, chop_mode,
                                               mtype)
                        # make detailed pairwise alignments of the segments
                        ref_rec = load_genbank(ref_ctg_file)
                        query_rec = load_fasta(q_contig)
                        iter_align(chop_array, ref_rec, query_rec,
                                   aln_segs_dir, segfile)
                    except IOError:
                        msg = "\nERROR: Mauve alignment failed\n\t\t\t"
                        run_ref.log(msg)
                        print msg
                    except Exception:
                        msg = "\nERROR: Iteration failed\n\t\t\t"
                        run_ref.log(msg)
                        print msg
        print ""
示例#4
0
def align_cstrct2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes,
                     max_size, chop_mode, mtype, mauve_exec):
    """Align constructs pairwise to the reference contig."""
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir + run_id + "/"
    ref_ctg_file = run_ref.file
    mauve_root = run_root + run_dirs['mauve_out_dir'] + ref_n + "/constructs/"
    segments_root = run_root + run_dirs['aln_seg_dir'] + ref_n + "/constructs/"
    scaff_root = run_root + run_dirs['scaffolds_dir'] + ref_n + "/"
    ensure_dir([segments_root])
    print " ", ref_n
    # log
    logstring = "".join(
        ["\n\n# Align scaffold constructs to reference @", timestamp, "\n"])
    run_ref.log(logstring)
    # cycle through genomes
    for genome in genomes:
        # set inputs
        g_name = genome['name']
        scaff_gbk = scaff_root + g_name + "_" + ref_n + "_scaffold.gbk"
        file_list = (ref_ctg_file, scaff_gbk)
        print "\t", g_name, "...",
        # log
        logstring = "".join(["\n", g_name])
        run_ref.log(logstring)
        # set outputs
        mauve_dir = mauve_root + g_name + "/"
        aln_segs_dir = segments_root + g_name + "/"
        ensure_dir([mauve_dir, aln_segs_dir])
        mauve_outfile = mauve_dir + g_name + "_" + ref_n + ".mauve"
        segfile = aln_segs_dir + g_name + "_" + ref_n + "_segs.txt"
        # abort if the reference file is not found
        try:
            open(ref_ctg_file, 'r')
        except IOError:
            msg = "ERROR: Reference file not found"
            print msg
            run_ref.log(msg)
            raise
        # abort if there is no scaffold construct
        try:
            open(scaff_gbk, 'r')
        except IOError:
            msg = "WARNING: No scaffold construct to align"
            print msg
            run_ref.log(msg)
        else:
            # prep segments file
            open(segfile, 'w').write('')
            # purge any pre-existing sslist file
            sslist_file = scaff_gbk + ".sslist"
            if os.path.isfile(sslist_file):
                try:
                    os.remove(sslist_file)
                except Exception:
                    raise
            # do Mauve alignment
            align_mauve(file_list, mauve_outfile, mauve_exec)
            try:
                # parse Mauve output (without initial clumping)
                coords = mauver_load2_k0(mauve_outfile + ".backbone", 0, mtype)
                print len(coords), '->',
                logstring = "".join(["\t", str(len(coords))])
                run_ref.log(logstring)
                # chop segments that are too long
                chop_array = chop_rows(coords, max_size, chop_mode, mtype)
                print len(chop_array), 'segments <', max_size, 'bp',
                logstring = "".join(["\t", str(len(chop_array))])
                run_ref.log(logstring)
                # make detailed pairwise alignments of the segments
                ref_rec = load_genbank(ref_ctg_file)
                query_rec = load_genbank(scaff_gbk)
                id = iter_align(chop_array, ref_rec, query_rec, aln_segs_dir,
                                segfile)
                print "@", id, "% id. overall"
                logstring = "".join(["\t", str(id)])
                run_ref.log(logstring)
            except IOError:
                msg = "\nERROR: Mauve alignment failed"
                run_ref.log(msg)
                print msg
示例#5
0
def align_ctg2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs,
                  genomes, mauve_exec, max_size, chop_mode, mtype):
    """Align contigs pairwise to the reference contig."""
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir+run_id+"/"
    ref_ctg_file = run_ref.file
    mauve_root = run_root+run_dirs['mauve_out_dir']+ref_n+"/contigs/"
    segments_root = run_root+run_dirs['aln_seg_dir']+ref_n+"/contigs/"
    q_ctgs_root = run_root+run_dirs['match_out_dir']+ref_n+"/"
    ensure_dir([segments_root])
    print " ", ref_n
    # log
    logstring = "".join(["\n\n# Align contigs to ref @", timestamp, "\n"])
    run_ref.log(logstring)
    # cycle through genomes
    for genome in genomes:
        # set inputs and outputs
        g_name = genome['name']
        ctgs_fas_dir = q_ctgs_root+g_name+"/"
        mauve_dir = mauve_root+g_name+"/"
        aln_segs_root = segments_root+g_name+"/"
        ensure_dir([mauve_dir])
        print "\t", g_name, "...",
        # log
        logstring = "".join(["\n", g_name])
        run_ref.log(logstring)
        # list genbank files in matches directory
        dir_contents = listdir(ctgs_fas_dir)
        for item in dir_contents:
            pattern = re.compile(r'.*_(\d*)\.fas$')
            match = pattern.match(item)
            if match:
                ctg_num = match.group(1)
                print ctg_num,
                logstring = "".join(["\t", ctg_num])
                run_ref.log(logstring)
                # set inputs and outputs
                q_contig = ctgs_fas_dir+item
                file_list = (ref_ctg_file, q_contig)
                mauve_outfile = mauve_dir+ctg_num+".mauve"
                aln_segs_dir = aln_segs_root+ctg_num+"/"
                ensure_dir([aln_segs_dir])
                segfile = aln_segs_dir+ctg_num+"_"+ref_n+"_segs.txt"
                open(segfile, 'w').write('')
                # do Mauve alignment
                try:
                    open(ref_ctg_file, 'r')
                    open(q_contig, 'r')
                except IOError:
                    msg = "\nERROR: File missing, cannot align\n\t\t\t"
                    run_ref.log(msg)
                    print msg
                else:
                    align_mauve(file_list, mauve_outfile, mauve_exec)
                    try:
                        # parse Mauve output (without initial clumping)
                        coords = mauver_load2_k0(mauve_outfile+".backbone",
                                                 0, mtype)
                        # chop segments that are too long
                        chop_array = chop_rows(coords, max_size, chop_mode,
                                               mtype)
                        # make detailed pairwise alignments of the segments
                        ref_rec = load_genbank(ref_ctg_file)
                        query_rec = load_fasta(q_contig)
                        iter_align(chop_array, ref_rec, query_rec,
                                   aln_segs_dir, segfile)
                    except IOError:
                        msg = "\nERROR: Mauve alignment failed\n\t\t\t"
                        run_ref.log(msg)
                        print msg
                    except Exception:
                        msg = "\nERROR: Iteration failed\n\t\t\t"
                        run_ref.log(msg)
                        print msg
        print ""
示例#6
0
def align_cstrct2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs,
                     genomes, max_size, chop_mode, mtype, mauve_exec):
    """Align constructs pairwise to the reference contig."""
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir+run_id+"/"
    ref_ctg_file = run_ref.file
    mauve_root = run_root+run_dirs['mauve_out_dir']+ref_n+"/constructs/"
    segments_root = run_root+run_dirs['aln_seg_dir']+ref_n+"/constructs/"
    scaff_root = run_root+run_dirs['scaffolds_dir']+ref_n+"/"
    ensure_dir([segments_root])
    print " ", ref_n
    # log
    logstring = "".join(["\n\n# Align scaffold constructs to reference @",
                         timestamp, "\n"])
    run_ref.log(logstring)
    # cycle through genomes
    for genome in genomes:
        # set inputs
        g_name = genome['name']
        scaff_gbk = scaff_root+g_name+"_"+ref_n+"_scaffold.gbk"
        file_list = (ref_ctg_file, scaff_gbk)
        print "\t", g_name, "...",
        # log
        logstring = "".join(["\n", g_name])
        run_ref.log(logstring)
        # set outputs
        mauve_dir = mauve_root+g_name+"/"
        aln_segs_dir = segments_root+g_name+"/"
        ensure_dir([mauve_dir, aln_segs_dir])
        mauve_outfile = mauve_dir+g_name+"_"+ref_n+".mauve"
        segfile = aln_segs_dir+g_name+"_"+ref_n+"_segs.txt"
        # abort if the reference file is not found
        try: open(ref_ctg_file, 'r')
        except IOError:
            msg = "ERROR: Reference file not found"
            print msg
            run_ref.log(msg)
            raise
        # abort if there is no scaffold construct
        try: open(scaff_gbk, 'r')
        except IOError:
            msg = "WARNING: No scaffold construct to align"
            print msg
            run_ref.log(msg)
        else:
            # prep segments file
            open(segfile, 'w').write('')
            # purge any pre-existing sslist file
            sslist_file = scaff_gbk+".sslist"
            if os.path.isfile(sslist_file):
                try: os.remove(sslist_file)
                except Exception: raise
            # do Mauve alignment
            align_mauve(file_list, mauve_outfile, mauve_exec)
            try:
                # parse Mauve output (without initial clumping)
                coords = mauver_load2_k0(mauve_outfile+".backbone", 0, mtype)
                print len(coords), '->',
                logstring = "".join(["\t", str(len(coords))])
                run_ref.log(logstring)
                # chop segments that are too long
                chop_array = chop_rows(coords, max_size, chop_mode, mtype)
                print len(chop_array), 'segments <', max_size, 'bp',
                logstring = "".join(["\t", str(len(chop_array))])
                run_ref.log(logstring)
                # make detailed pairwise alignments of the segments
                ref_rec = load_genbank(ref_ctg_file)
                query_rec = load_genbank(scaff_gbk)
                id = iter_align(chop_array, ref_rec, query_rec,
                                aln_segs_dir, segfile)
                print "@", id, "% id. overall"
                logstring = "".join(["\t", str(id)])
                run_ref.log(logstring)
            except IOError:
                msg = "\nERROR: Mauve alignment failed"
                run_ref.log(msg)
                print msg