Exemplo n.º 1
0
def _collapse(in_file):
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    if file_exists(out_file):
        return out_file
    seqs = collapse(in_file)
    write_output(out_file, seqs)
    return out_file
Exemplo n.º 2
0
def _collapse(in_file):
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    if file_exists(out_file):
        return out_file
    seqs = collapse(in_file)
    write_output(out_file, seqs)
    return out_file
Exemplo n.º 3
0
def collapse_fastq(args):
    """collapse fasq files after adapter trimming
    """
    try:
        umi_fn = args.fastq
        if _is_umi(args.fastq):
            umis = collapse(args.fastq)
            umi_fn = os.path.join(args.out, splitext_plus(os.path.basename(args.fastq))[0] + "_umi_trimmed.fastq")
            write_output(umi_fn, umis, args.minimum)
        seqs = collapse(umi_fn)
        out_file = splitext_plus(os.path.basename(args.fastq))[0] + "_trimmed.fastq"
    except IOError as e:
        logger.error("I/O error({0}): {1}".format(e.errno, e.strerror))
        raise "Can not read file"
    out_file = os.path.join(args.out, out_file)
    write_output(out_file, seqs, args.minimum)
    return out_file
Exemplo n.º 4
0
def _collapse(in_file):
    """
    Collpase reads into unique sequences with seqcluster
    """
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    if file_exists(out_file):
        return out_file
    seqs = collapse(in_file)
    write_output(out_file, seqs, minimum=1, size=16)
    return out_file
Exemplo n.º 5
0
def _collapse(in_file):
    """
    Collpase reads into unique sequences with seqcluster
    """
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    if file_exists(out_file):
        return out_file
    seqs = collapse(in_file)
    write_output(out_file, seqs, minimum=1, size=16)
    return out_file
Exemplo n.º 6
0
 def test_umis(self):
     from seqcluster.libs.fastq import collapse, write_output
     umis = collapse(os.path.abspath("data/examples/umis/sample.fastq"))
     if len(umis.keys()) != 2:
         raise ValueError("umis didn't detect two unique sequences")
     out_dir = "test/test_automated_output"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     write_output(os.path.join(out_dir, "umis.fastq"), umis)
Exemplo n.º 7
0
 def test_umis(self):
     from seqcluster.libs.fastq import collapse, write_output
     umis = collapse(os.path.abspath("data/examples/umis/sample.fastq"))
     if len(umis.keys()) != 2:
         raise ValueError("umis didn't detect two unique sequences")
     out_dir = "test/test_automated_output"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     write_output(os.path.join(out_dir, "umis.fastq"), umis)
Exemplo n.º 8
0
def collapse_fastq(args):
    """collapse fasq files after adapter trimming
    """
    try:
        umi_fn = args.fastq
        if _is_umi(args.fastq):
            umis = collapse(args.fastq)
            umi_fn = os.path.join(
                args.out,
                splitext_plus(os.path.basename(args.fastq))[0] +
                "_umi_trimmed.fastq")
            write_output(umi_fn, umis, args.minimum)
        seqs = collapse(umi_fn)
        out_file = splitext_plus(os.path.basename(
            args.fastq))[0] + "_trimmed.fastq"
    except IOError as e:
        logger.error("I/O error({0}): {1}".format(e.errno, e.strerror))
        raise "Can not read file"
    out_file = os.path.join(args.out, out_file)
    write_output(out_file, seqs, args.minimum)
    return out_file
Exemplo n.º 9
0
def _collapse(in_file):
    seqs = collapse(in_file)
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    write_output(out_file, seqs)
    return out_file
Exemplo n.º 10
0
def _collapse(in_file):
    seqs = collapse(in_file)
    out_file = append_stem(in_file, ".trimming").replace(".gz", "")
    write_output(out_file, seqs)
    return out_file
Exemplo n.º 11
0
from seqcluster.libs.fastq import collapse, splitext_plus, write_output
from bcbio.distributed.transaction import file_transaction, tx_tmpdir
from bcbio.utils import (file_exists, append_stem, replace_directory, symlink_plus, local_path_export)

from collections import Counter


if __name__ == "__main__":
	in_file = sys.argv[1]

	"""
	Collpase reads into unique sequences with seqcluster
	"""
	out_file = append_stem(in_file, ".trimming").replace(".gz", "")
	#out_file = splitext_plus(os.path.basename(fastq))[0] + ".fq"
	seqs = collapse(in_file)
	write_output(out_file, seqs, 1)

	"""
	Calculate size distribution after adapter removal
	"""
	data = Counter()
	out_stat_file = out_file + "_size_stats"

	with open(out_file) as in_handle:
		for line in in_handle:
			counts = int(line.strip().split("_x")[1])
			line = in_handle.next()
			l = len(line.strip())
			in_handle.next()
			in_handle.next()