def main(args): if args.split: kevlar.mkdirp(args.split, trim=True) outstream = None if args.split else kevlar.open(args.out, 'w') readstream = kevlar.parse_augmented_fastx(kevlar.open(args.infile, 'r')) partitioner = partition(readstream, strict=args.strict, minabund=args.min_abund, maxabund=args.max_abund, dedup=args.dedup, gmlfile=args.gml, logstream=args.logfile) partnum = 0 numreads = 0 for partnum, part in enumerate(partitioner, 1): numreads += len(part) if args.split: ofname = '{:s}.cc{:d}.augfastq.gz'.format(args.split, partnum) with kevlar.open(ofname, 'w') as outfile: for read in part: kevlar.print_augmented_fastx(read, outfile) else: for read in part: read.name += ' kvcc={:d}'.format(partnum) kevlar.print_augmented_fastx(read, outstream) message = '[kevlar::partition] grouped {:d} reads'.format(numreads) message += ' into {:d} connected components'.format(partnum) print(message, file=args.logfile)
def test_mkdirp(): tempdir = tempfile.mkdtemp() path1 = os.path.join(tempdir, 'partitions') assert kevlar.mkdirp(path1) == path1 path2 = os.path.join(tempdir, 'partitions2', 'part') path2test = os.path.join(tempdir, 'partitions2') assert kevlar.mkdirp(path2, trim=True) == path2test path3 = os.path.join(tempdir, 'partitions3', 'a', 'long', 'path', 'PART') path3test = os.path.join(tempdir, 'partitions3', 'a', 'long', 'path') assert kevlar.mkdirp(path3, trim=True) == path3test shutil.rmtree(tempdir)