def get_files(input_dir, input_format):
    extensions = get_file_extensions(input_format)
    files = []
    for ext in extensions:
        files.extend(glob.glob(os.path.join(os.path.expanduser(input_dir), '*{}*'.format(ext))))
    # ensure we collapse duplicate filenames
    return list(set(files))
def main():
    args = get_args()
    # setup logging
    log, my_name = setup_logging(args.verbosity, args.log_path)
    text = " Starting {} ".format(my_name)
    log.info(text.center(65, "="))
    alignments = []
    log.info("Getting aligned sequences for trimming")
    for ftype in get_file_extensions(args.input_format):
        alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype))))
    # package up needed arguments for map()
    package = [args.input_format, args.window, args.threshold, args.proportion, args.max_divergence, args.min_length]
    params = zip([package] * len(alignments), alignments)
    log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)")
    # if --multprocessing, use Pool.map(), else use map()
    # can also extend to MPI map, but not really needed on multicore
    # machine
    if args.cores > 1:
        assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have"
        pool = multiprocessing.Pool(args.cores - 1)
        alignments = pool.map(get_and_trim_alignments, params)
    else:
        alignments = map(get_and_trim_alignments, params)
    # kick the stdout down one line since we were using sys.stdout
    print("")
    # drop back into logging
    log.info("Alignment ends")
    # write the output files
    write_alignments_to_outdir(log, args.output, alignments, args.output_format)
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
def get_files(input_dir, input_format):
    extensions = get_file_extensions(input_format)
    files = []
    for ext in extensions:
        files.extend(
            glob.glob(
                os.path.join(os.path.expanduser(input_dir),
                             '*{}*'.format(ext))))
    # ensure we collapse duplicate filenames
    return list(set(files))
def write_alignments_to_outdir(outdir, alignments, format):
    print '\nWriting output files...'
    for tup in alignments:
        locus, aln = tup
        if aln.trimmed_alignment is not None:
            outname = "{}{}".format(os.path.join(outdir, locus),
                                    get_file_extensions(format)[0])
            outf = open(outname, 'w')
            outf.write(aln.trimmed_alignment.format(format))
            outf.close()
        else:
            print "\tSkipped writing {0}, there was no record".format(locus)
示例#5
0
def write_alignments_to_outdir(outdir, alignments, format):
    print '\nWriting output files...'
    for tup in alignments:
        locus, aln = tup
        if aln:
            outname = "{}{}".format(
                    os.path.join(outdir, locus),
                    get_file_extensions(format)[0]
                )
            outf = open(outname, 'w')
            outf.write(aln.trimmed_alignment.format(format))
            outf.close()
        else:
            print "\tSkipped writing {0}, there was no record".format(locus)
示例#6
0
def main():
    args = get_args()
    alignments = []
    for ftype in get_file_extensions(args.input_format):
        alignments.extend(glob.glob(os.path.join(args.alignments, "*{}".format(ftype))))
    for count, f in enumerate(alignments):
        aln = AlignIO.read(f, args.input_format)
        for taxon in aln:
            if taxon.id == args.taxon:
                seq = str(taxon.seq).replace('-', '')
                locus = os.path.splitext(os.path.basename(f))[0]
                if not len(seq) == 0:
                    args.output.write(">{0}\n{1}\n".format(locus, seq))
                else:
                    print locus
    args.output.close()
示例#7
0
def main():
    args = get_args()
    alignments = []
    for ftype in get_file_extensions(args.input_format):
        alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype))))
    # package up needed arguments for map()
    package = [args.input_format, args.window, args.threshold, args.proportion]
    params = zip([package] * len(alignments), alignments)
    # print some output for user
    sys.stdout.write('Trimming')
    sys.stdout.flush()
    # if --multprocessing, use Pool.map(), else use map()
    # can also extend to MPI map, but not really needed on multicore
    # machine
    if args.multiprocessing:
        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        alignments = pool.map(get_and_trim_alignments, params)
    else:
        alignments = map(get_and_trim_alignments, params)
    write_alignments_to_outdir(args.output, alignments, args.output_format)
def main():
    args = get_args()
    alignments = []
    for ftype in get_file_extensions(args.input_format):
        alignments.extend(
            glob.glob(os.path.join(args.input, "*{}".format(ftype))))
    # package up needed arguments for map()
    package = [args.input_format, args.window, args.threshold, args.proportion]
    params = zip([package] * len(alignments), alignments)
    # print some output for user
    sys.stdout.write('Trimming')
    sys.stdout.flush()
    # if --multprocessing, use Pool.map(), else use map()
    # can also extend to MPI map, but not really needed on multicore
    # machine
    if args.cores > 1:
        pool = multiprocessing.Pool(args.cores - 1)
        alignments = pool.map(get_and_trim_alignments, params)
    else:
        alignments = map(get_and_trim_alignments, params)
    write_alignments_to_outdir(args.output, alignments, args.output_format)
示例#9
0
def get_files(input_dir, input_format):
    alignments = []
    for ftype in get_file_extensions(input_format):
        alignments.extend(
            glob.glob(os.path.join(input_dir, "*{}".format(ftype))))
    return alignments
def get_files(input_dir, input_format):
    alignments = []
    for ftype in get_file_extensions(input_format):
        alignments.extend(glob.glob(os.path.join(input_dir, "*{}".format(ftype))))
    return alignments