def main(): logger = init_logging_console_only(logging.INFO) try: logger.info('Starting.') fname = 'censored1.fastq' bad_cycles_filename = 'bad_cycles.csv' dd = MicallDD(fname, bad_cycles_filename) read_indexes = range(len(dd.reads)) run_test = True if run_test: min_indexes = dd.ddmin(read_indexes) else: min_indexes = read_indexes dd._test(min_indexes, debug_file_prefix='micall_debug') # dd.write_simple_fastq(fname + '_min.fastq', min_indexes) logger.info('Done.') except Exception as ex: logger.error('Failed.', exc_info=ex)
from micall.utils.externals import Bowtie2, Bowtie2Build, LineCounter from micall.utils.translation import reverse_and_complement CONSENSUS_Q_CUTOFF = 20 # Min Q for base to contribute to conseq (pileup2conseq) MIN_MAPPING_EFFICIENCY = 0.95 # Fraction of fastq reads mapped needed MAX_REMAPS = 3 # Number of remapping attempts if mapping efficiency unsatisfied # SAM file format fieldnames = [ 'qname', 'flag', 'rname', 'pos', 'mapq', 'cigar', 'rnext', 'pnext', 'tlen', 'seq', 'qual' ] cigar_re = re.compile('[0-9]+[MIDNSHPX=]') # CIGAR token logger = miseq_logging.init_logging_console_only(logging.DEBUG) indel_re = re.compile('[+-][0-9]+') line_counter = LineCounter() def is_first_read(flag): """ Interpret bitwise flag from SAM field. Returns True or False indicating whether the read is the first read in a pair. """ IS_FIRST_SEGMENT = 0x40 return (int(flag) & IS_FIRST_SEGMENT) != 0 def is_unmapped_read(flag): """
simple_prefix, pssm, ruby_script, delete_results=False) if not txtfilename.endswith('.txt'): with open(simple_prefix + '.txt', 'w') as simplefile: for line in simple_remap_lines: simplefile.write(line) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Find the simplest test failure by trimming SAM files.') parser.add_argument('workdir', help='path to folder holding SAM files') parser.add_argument('ruby_script', help='path to Ruby version of G2P') parser.add_argument('--pattern', default='*.remap.csv', help='File name pattern to match SAM files') args = parser.parse_args() logger = init_logging_console_only(logging.INFO) pssm = Pssm(path_to_lookup='../g2p/g2p_fpr.txt', path_to_matrix='../g2p/g2p.matrix') for txtfilename in sorted( glob.glob(os.path.join(args.workdir, args.pattern))): logger.info(os.path.basename(txtfilename)) compare_conseqs(txtfilename, args.ruby_script, pssm) logger.info('Done.')
parser.add_argument('coord_ins_csv', type=argparse.FileType('w'), help='CSV containing insertions relative to coordinate reference') parser.add_argument('conseq_csv', type=argparse.FileType('w'), help='CSV containing consensus sequences') parser.add_argument('failed_align_csv', type=argparse.FileType('w'), help='CSV containing any consensus that failed to align') parser.add_argument('nuc_variants_csv', type=argparse.FileType('w'), help='CSV containing top nucleotide variants') return parser.parse_args() logger = miseq_logging.init_logging_console_only(logging.DEBUG) MAX_CUTOFF = 'MAX' class SequenceReport(object): """ Hold the data for several reports related to a sample's genetic sequence. To use a report object, read a group of aligned reads that mapped to a single region, and then write out all the reports for that region. """ def __init__(self, insert_writer, projects, conseq_mixture_cutoffs): """ Create an object instance.
print 'Simplifying sample {}'.format(txtfilename) reads = defaultdict(list) read_fastq(txtfilename, reads) read_count = len(reads) read_fastq(get_reverse_filename(txtfilename), reads) added_count = len(reads) - read_count if added_count > 0: raise RuntimeError('Found {} new reads.'.format(added_count)) reads = reads.values() simple_filename = txtfilename.replace('censored1.fastq', 'simple_censored1.fastq') simple_fastq_lines = ddmin(reads, simple_filename) write_simple_fastq(simple_filename, simple_fastq_lines) if __name__ == '__main__': logger = init_logging_console_only(logging.INFO) test_file('/home/don/git/MiCall/micall/tests/working/61515A-HCV_S1_uncensored1.fastq') exit() parser = argparse.ArgumentParser( description='Find the simplest test failure by trimming FASTQ files.') parser.add_argument('workdir', help='path to folder holding FASTQ files') parser.add_argument('--pattern', default='*censored1.fastq', help='File name pattern to match FASTQ files') args = parser.parse_args() filenames = glob.glob(os.path.join(args.workdir, args.pattern)) filenames.sort()