def run(self): title1 = self.title1 title2 = self.title2 working1 = working_directory.Working(self.working_dir1) working2 = working_directory.Working(self.working_dir2) cutoff = self.cutoff sequence_names = [ name for name, length in working1.get_reference().get_lengths() ] if title1 is None: title1 = working1.name if title2 is None: title2 = working2.name n = 1 while significance([('A', n)], [('T', n)], 1.0) > cutoff: n += 1 f = open(self.prefix + '.txt', 'wb') print >> f, '%g\tsignificance cutoff' % cutoff print >> f, '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n print >> f, 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % ( title1, title2, title1, title2) for sequence_name in sequence_names: filename1 = working1 / ( grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') filename2 = working2 / ( grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip(read_file(filename1), read_file(filename2)): assert pos1 == pos2 and ref1 == ref2 if pos1 % 1000 == 0: grace.status('Testing %s %d' % (sequence_name, pos1)) dec_ins1 = io.decode_evidence(ins1) dec_ins2 = io.decode_evidence(ins2) if dec_ins1 and dec_ins2: sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff) if sig is not None and sig <= cutoff: print >> f, '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % ( sequence_name, pos1, 'insertion-before', ins1, ins2, sig, conins1, conins2) f.flush() dec_sub1 = io.decode_evidence(sub1) dec_sub2 = io.decode_evidence(sub2) if dec_sub1 and dec_sub2: sig = significance(dec_sub1, dec_sub2, cutoff) if sig is not None and sig <= cutoff: if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-': what = 'deletion' elif dec_sub1[0][0] != dec_sub2[0][0]: what = 'substitution' else: what = 'different mix' print >> f, '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % ( sequence_name, pos1, what, ref1, sub1, sub2, sig, consub1, consub2) f.flush() f.close() grace.status('') return 0
def main(args): title1, args = grace.get_option_value(args, "--title1", str, None) title2, args = grace.get_option_value(args, "--title2", str, None) grace.expect_no_further_options(args) if len(args) != 3: print >> sys.stderr, USAGE return 1 working_dir1 = args[0] working_dir2 = args[1] cutoff = float(args[2]) sequence_names = [name for name, sequence in io.read_sequences(os.path.join(working_dir1, "reference.fa"))] if title1 is None: title1 = working_dir1 if title2 is None: title2 = working_dir2 n = 1 while significance([("A", n)], [("T", n)], 1.0) > cutoff: n += 1 print "%g\tsignificance cutoff" % cutoff print "%d\tdepth required to call substitution (greater if there are errors in the reads)" % n print "Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s" % ( title1, title2, title1, title2, ) for sequence_name in sequence_names: filename1 = os.path.join(working_dir1, grace.filesystem_friendly_name(sequence_name) + "-evidence.txt") filename2 = os.path.join(working_dir2, grace.filesystem_friendly_name(sequence_name) + "-evidence.txt") for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip( read_file(filename1), read_file(filename2) ): assert pos1 == pos2 and ref1 == ref2 if pos1 % 1000 == 0: grace.status("Testing %s %d" % (sequence_name, pos1)) dec_ins1 = io.decode_evidence(ins1) dec_ins2 = io.decode_evidence(ins2) if dec_ins1 and dec_ins2: sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff) if sig is not None and sig <= cutoff: grace.status("") print "%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s" % ( sequence_name, pos1, "insertion-before", ins1, ins2, sig, conins1, conins2, ) dec_sub1 = io.decode_evidence(sub1) dec_sub2 = io.decode_evidence(sub2) if dec_sub1 and dec_sub2: sig = significance(dec_sub1, dec_sub2, cutoff) if sig is not None and sig <= cutoff: if dec_sub1[0][0] == "-" or dec_sub2[0][0] == "-": what = "deletion" elif dec_sub1[0][0] != dec_sub2[0][0]: what = "substitution" else: what = "different mix" grace.status("") print "%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s" % ( sequence_name, pos1, what, ref1, sub1, sub2, sig, consub1, consub2, ) grace.status("") return 0
def run(self): title1 = self.title1 title2 = self.title2 working1 = working_directory.Working(self.working_dir1) working2 = working_directory.Working(self.working_dir2) cutoff = self.cutoff sequence_names = [ name for name, length in working1.get_reference().get_lengths() ] if title1 is None: title1 = working1.name if title2 is None: title2 = working2.name n = 1 while significance([('A',n)],[('T',n)],1.0) > cutoff: n += 1 f = open(self.prefix + '.txt','wb') print >> f, '%g\tsignificance cutoff' % cutoff print >> f, '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n print >> f, 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % (title1, title2, title1, title2) for sequence_name in sequence_names: filename1 = working1/(grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') filename2 = working2/(grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip(read_file(filename1), read_file(filename2)): assert pos1 == pos2 and ref1 == ref2 if pos1 % 1000 == 0: grace.status('Testing %s %d' % (sequence_name, pos1)) dec_ins1 = io.decode_evidence(ins1) dec_ins2 = io.decode_evidence(ins2) if dec_ins1 and dec_ins2: sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff) if sig is not None and sig <= cutoff: print >> f, '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % (sequence_name, pos1, 'insertion-before', ins1, ins2, sig, conins1, conins2) f.flush() dec_sub1 = io.decode_evidence(sub1) dec_sub2 = io.decode_evidence(sub2) if dec_sub1 and dec_sub2: sig = significance(dec_sub1, dec_sub2, cutoff) if sig is not None and sig <= cutoff: if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-': what = 'deletion' elif dec_sub1[0][0] != dec_sub2[0][0]: what = 'substitution' else: what = 'different mix' print >> f, '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % (sequence_name, pos1, what, ref1, sub1, sub2, sig, consub1, consub2) f.flush() f.close() grace.status('') return 0
def main(args): title1, args = grace.get_option_value(args, '--title1', str, None) title2, args = grace.get_option_value(args, '--title2', str, None) grace.expect_no_further_options(args) if len(args) != 3: print >> sys.stderr, USAGE return 1 working_dir1 = args[0] working_dir2 = args[1] cutoff = float(args[2]) sequence_names = [ name for name, sequence in io.read_sequences( os.path.join(working_dir1, 'reference.fa')) ] if title1 is None: title1 = working_dir1 if title2 is None: title2 = working_dir2 n = 1 while significance([('A', n)], [('T', n)], 1.0) > cutoff: n += 1 print '%g\tsignificance cutoff' % cutoff print '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n print 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % ( title1, title2, title1, title2) for sequence_name in sequence_names: filename1 = os.path.join( working_dir1, grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') filename2 = os.path.join( working_dir2, grace.filesystem_friendly_name(sequence_name) + '-evidence.txt') for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip(read_file(filename1), read_file(filename2)): assert pos1 == pos2 and ref1 == ref2 if pos1 % 1000 == 0: grace.status('Testing %s %d' % (sequence_name, pos1)) dec_ins1 = io.decode_evidence(ins1) dec_ins2 = io.decode_evidence(ins2) if dec_ins1 and dec_ins2: sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff) if sig is not None and sig <= cutoff: grace.status('') print '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % ( sequence_name, pos1, 'insertion-before', ins1, ins2, sig, conins1, conins2) dec_sub1 = io.decode_evidence(sub1) dec_sub2 = io.decode_evidence(sub2) if dec_sub1 and dec_sub2: sig = significance(dec_sub1, dec_sub2, cutoff) if sig is not None and sig <= cutoff: if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-': what = 'deletion' elif dec_sub1[0][0] != dec_sub2[0][0]: what = 'substitution' else: what = 'different mix' grace.status('') print '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % ( sequence_name, pos1, what, ref1, sub1, sub2, sig, consub1, consub2) grace.status('') return 0