import os import sys import time import logging import argparse import tempfile import resource import subprocess import collections import distutils.spawn import parallel_tools import seqtools import shims # There can be problems with the submodules, but none are essential. # Try to load these modules, but if there's a problem, load a harmless dummy and continue. simplewrap = shims.get_module_or_shim('utillib.simplewrap') version = shims.get_module_or_shim('utillib.version') phone = shims.get_module_or_shim('ET.phone') #TODO: Warn if it looks like the two input FASTQ files are the same (i.e. the _1 file was given # twice). Can tell by whether the alpha and beta (first and last 12bp) portions of the barcodes # are always identical. This would be a good thing to warn about, since it's an easy mistake # to make, but it's not obvious that it happened. The pipeline won't fail, but will just # produce pretty weird results. USAGE = """$ %(prog)s [options] families.tsv > families.msa.tsv $ cat families.tsv | %(prog)s [options] > families.msa.tsv""" DESCRIPTION = """Read in sorted FASTQ data and do multiple sequence alignments of each family.""" def make_argparser():
#!/usr/bin/env python3 import argparse import gzip import logging import os import subprocess import sys import shims assert sys.version_info.major >= 3, 'Python 3 required' version = shims.get_module_or_shim('utillib.version') SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) DESCRIPTION = """Run the entire Du Novo pipeline.""" def make_argparser(): parser = argparse.ArgumentParser(description=DESCRIPTION, add_help=False) io = parser.add_argument_group('Inputs and outputs') io.add_argument('fastq1', metavar='reads_1.fq', type=open_as_text_or_gzip, help='Input reads (mate 1). Can be gzipped.') io.add_argument('fastq2', metavar='reads_2.fq', type=open_as_text_or_gzip, help='Input reads (mate 2). Can be gzipped.') io.add_argument( '-o', '--outdir', help= 'The directory to create the output (and intermediate) files in. Must exist already and '
#!/usr/bin/env python from __future__ import division import os import sys import time import logging import argparse import resource import collections import parallel_tools import consensus import swalign import shims # There can be problems with the submodules, but none are essential. # Try to load these modules, but if there's a problem, load a harmless dummy and continue. simplewrap = shims.get_module_or_shim('utillib.simplewrap') version = shims.get_module_or_shim('utillib.version') phone = shims.get_module_or_shim('ET.phone') # The ascii values that represent a 0 PHRED score. QUAL_OFFSETS = {'sanger':33, 'solexa':64} USAGE = """$ %(prog)s [options] families.msa.tsv -1 duplexes_1.fa -2 duplexes_2.fa $ cat families.msa.tsv | %(prog)s [options] -1 duplexes_1.fa -2 duplexes_2.fa""" DESCRIPTION = """Build consensus sequences from read aligned families. Prints duplex consensus \ sequences in FASTA to stdout. The sequence ids are BARCODE.MATE, e.g. "CTCAGATAACATACCTTATATGCA.1", \ where "BARCODE" is the input barcode, and "MATE" is "1" or "2" as an arbitrary designation of the \ two reads in the pair. The id is followed by the count of the number of reads in the two families \ (one from each strand) that make up the duplex, in the format READS1/READS2. If the duplex is \ actually a single-strand consensus because the matching strand is missing, only one number is \ listed. Rules for consensus building: Single-strand consensus sequences are made by counting how many of \
from __future__ import print_function import os import sys import gzip import time import logging import argparse import resource import subprocess import networkx import parallel_tools import swalign import shims # There can be problems with the submodules, but none are essential. # Try to load these modules, but if there's a problem, load a harmless dummy and continue. version = shims.get_module_or_shim('utillib.version') phone = shims.get_module_or_shim('ET.phone') VERBOSE = (logging.DEBUG+logging.INFO)//2 USAGE = '$ %(prog)s [options] families.tsv barcodes.fa barcodes.sam > families.corrected.tsv' DESCRIPTION = """Correct barcodes using an alignment of all barcodes to themselves. Reads the alignment in SAM format and corrects the barcodes in an input "families" file (the output of make-barcodes.awk). It will print the "families" file to stdout with barcodes (and orders) corrected.""" def make_argparser(): parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) parser.add_argument('families', type=open_as_text_or_gzip, help='The sorted output of make-barcodes.awk. The important part is that it\'s a tab-delimited ' 'file with at least 2 columns: the barcode sequence and order, and it must be sorted in '