def test_get_LEA_seq_consensus_seqs(self): barcode_type = int(7) barcode_len = 7 barcode_correction_fn = None max_barcode_errors = 1.5 min_consensus = 0.66 max_cluster_ratio = 2.5 min_difference_in_bcs = 0.86 fwd_length = 19 rev_length = 19 min_reads_per_random_bc = 1 min_diff_in_clusters = self.min_difference_in_clusters barcode_column = 'BarcodeSequence' reverse_primer_column = 'ReversePrimer' function_call, _ = get_LEA_seq_consensus_seqs( self.fwd_read_fh, self.rev_read_fh, self.mapping_fp, self.temp_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) actual = function_call['Sample1']['AGCTACGAGCTATTGC'] expected = 'AAAAAAAAAAAAAAAAAAA^AAAAAAAAAAAAAAAAAA' self.assertEqual(actual, expected) # this call tests the second condition of if loop # in the function get_consensus_seq_lookup # i.e. select the majority sequence, as the cluster ratio # between max_cluster/second_best_cluster in the fwd_read_data # (and rev_read_data) is 3/1 > 2.5, # so the function get_consensus will not be called fn_call, _ = get_LEA_seq_consensus_seqs( self.get_cons_fwd_read_fh, self.get_cons_rev_read_fh, self.get_cons_mapping_fp, self.temp_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) get_cons_actual = fn_call['Sample1']['AGCTACGAGCTATTGC'] get_cons_expected = 'AAAAAAAAAACAAAAAAAA^AAAAAAAAAATAAAAATA' self.assertEqual(get_cons_actual, get_cons_expected) # this call tests the third condition of if loop # in the function get_consensus_seq_lookup. # i.e. calls the get_consensus function, as the cluster ratio # between max_cluster/second_best_cluster in the get_cons_fwd_read_data # (and get_cons_rev_read_data) is 2/1 ( < 2.5) # so the majority sequence will not be selected get_cons_actual = fn_call['Sample2']['AGCTACGCATCAAGGG'] get_cons_expected = 'AAAAAAAAAATAAAAAAAA^TTAAAAAAAAAAAAGAAAA' self.assertEqual(get_cons_actual, get_cons_expected) self.assertFalse(len(fn_call) <= 1, msg="The get_consensus_seqs_lookup function " "has returned early, without completing " "the three 'for' loops.")
def test_get_LEA_seq_consensus_seqs(self): barcode_type = int(7) barcode_len = 7 barcode_correction_fn = None max_barcode_errors = 1.5 min_consensus = 0.66 max_cluster_ratio = 2.5 min_difference_in_bcs = 0.86 fwd_length = 19 rev_length = 19 min_reads_per_random_bc = 1 min_diff_in_clusters = self.min_difference_in_clusters barcode_column = 'BarcodeSequence' reverse_primer_column = 'ReversePrimer' function_call, _ = get_LEA_seq_consensus_seqs(self.fwd_read_data, self.rev_read_data, self.mapping_fp, self.temp_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) actual = function_call['Sample1']['AGCTACGAGCTATTGC'] expected = 'AAAAAAAAAAAAAAAAAAA^AAAAAAAAAAAAAAAAAA' self.assertEqual(actual, expected)
def test_get_LEA_seq_consensus_seqs(self): barcode_type = int(7) barcode_len = 7 barcode_correction_fn = None max_barcode_errors = 1.5 min_consensus = 0.66 max_cluster_ratio = 2.5 min_difference_in_bcs = 0.86 fwd_length = 19 rev_length = 19 min_reads_per_random_bc = 1 min_diff_in_clusters = self.min_difference_in_clusters barcode_column = 'BarcodeSequence' reverse_primer_column = 'ReversePrimer' function_call, _ = get_LEA_seq_consensus_seqs(self.fwd_read_fh, self.rev_read_fh, self.mapping_fp, self.temp_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) actual = function_call['Sample1']['AGCTACGAGCTATTGC'] expected = 'AAAAAAAAAAAAAAAAAAA^AAAAAAAAAAAAAAAAAA' self.assertEqual(actual, expected) # this call tests the second condition of if loop # in the function get_consensus_seq_lookup # i.e. select the majority sequence, as the cluster ratio # between max_cluster/second_best_cluster in the fwd_read_data # (and rev_read_data) is 3/1 > 2.5, # so the function get_consensus will not be called fn_call, _ = get_LEA_seq_consensus_seqs(self.get_cons_fwd_read_fh, self.get_cons_rev_read_fh, self.get_cons_mapping_fp, self.temp_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) get_cons_actual = fn_call['Sample1']['AGCTACGAGCTATTGC'] get_cons_expected = 'AAAAAAAAAACAAAAAAAA^AAAAAAAAAATAAAAATA' self.assertEqual(get_cons_actual, get_cons_expected) # this call tests the third condition of if loop # in the function get_consensus_seq_lookup. # i.e. calls the get_consensus function, as the cluster ratio # between max_cluster/second_best_cluster in the get_cons_fwd_read_data # (and get_cons_rev_read_data) is 2/1 ( < 2.5) # so the majority sequence will not be selected get_cons_actual = fn_call['Sample2']['AGCTACGCATCAAGGG'] get_cons_expected = 'AAAAAAAAAATAAAAAAAA^TTAAAAAAAAAAAAGAAAA' self.assertEqual(get_cons_actual, get_cons_expected) self.assertFalse(len(fn_call) <= 1, msg="The get_consensus_seqs_lookup function " "has returned early, without completing " "the three 'for' loops.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) barcode_type = opts.barcode_type max_barcode_errors = opts.max_barcode_errors mapping_fp = opts.mapping_fp sequence_read_fps = opts.sequence_read_fps min_consensus = opts.min_consensus max_cluster_ratio = opts.max_cluster_ratio output_dir = opts.output_dir min_difference_in_bcs = opts.min_difference_in_bcs fwd_length = opts.fwd_length rev_length = opts.rev_length min_reads_per_random_bc = opts.min_reads_per_random_bc min_diff_in_clusters = opts.min_difference_in_clusters barcode_column = opts.header_barcode_column reverse_primer_column = opts.reverse_primer_column create_dir(output_dir) fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w") rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w") log_file = open(path.join(output_dir, "log.txt"), "w") if barcode_type == 'golay_12': barcode_correction_fn = decode_golay_12 barcode_len = 12 else: barcode_correction_fn = None try: barcode_len = int(barcode_type) except ValueError: option_parser.error("Invalid barcode type '%s'. The barcode type " "must be either golay_12 or a positive " "integer indicating the barcode length." % barcode_type) if max_barcode_errors < 0: option_parser.error("--max_barcode_errors must be greater than or " "equal to zero. You provided %.4f." % max_barcode_errors) if barcode_len < 1: option_parser.error("Invalid barcode length: %d. Must be greater " "than zero." % barcode_len) if len(sequence_read_fps) != 2: option_parser.error("You must provide exactly two sequence read " "filepaths, the first for forward reads and " "second for reverse reads. You specified %d " "filepaths." % len(sequence_read_fps)) fwd_read_f = open(sequence_read_fps[0], 'U') rev_read_f = open(sequence_read_fps[1], 'U') map_f = open(mapping_fp, 'U') (consensus_seq_lookup, log_out) = get_LEA_seq_consensus_seqs(fwd_read_f, rev_read_f, map_f, output_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) for sample_id in consensus_seq_lookup: for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]): consensus_seq = consensus_seq_lookup[sample_id][rand_bc] fwd_consensus, rev_consensus = consensus_seq.split('^') fwd_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, fwd_consensus)) rev_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, rev_consensus)) log_file.write(log_out) log_file.close() fwd_read_f.close() rev_read_f.close() fwd_consensus_outfile.close() rev_consensus_outfile.close() map_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) barcode_type = opts.barcode_type max_barcode_errors = opts.max_barcode_errors mapping_fp = opts.mapping_fp sequence_read_fps = opts.sequence_read_fps min_consensus = opts.min_consensus max_cluster_ratio = opts.max_cluster_ratio output_dir = opts.output_dir min_difference_in_bcs = opts.min_difference_in_bcs fwd_length = opts.fwd_length rev_length = opts.rev_length min_reads_per_random_bc = opts.min_reads_per_random_bc min_diff_in_clusters = opts.min_difference_in_clusters barcode_column = opts.header_barcode_column reverse_primer_column = opts.reverse_primer_column create_dir(output_dir) fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w") rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w") log_file = open(path.join(output_dir, "log.txt"), "w") if barcode_type == 'golay_12': barcode_correction_fn = decode_golay_12 barcode_len = 12 else: barcode_correction_fn = None try: barcode_len = int(barcode_type) except ValueError: option_parser.error("Invalid barcode type '%s'. The barcode type " "must be either golay_12 or a positive " "integer indicating the barcode length." % barcode_type) if max_barcode_errors < 0: option_parser.error("--max_barcode_errors must be greater than or " "equal to zero. You provided %.4f." % max_barcode_errors) if min_diff_in_clusters < 0 or min_diff_in_clusters > 1: option_parser.error("--min_difference_in_clusters must be " "between 0 to 1. You provided %.4f." % min_diff_in_clusters) if min_difference_in_bcs < 0 or min_difference_in_bcs > 1: option_parser.error("--min_difference_in_bcs must be between 0 to 1." " You provided %.4f." % min_difference_in_bcs) if barcode_len < 1: option_parser.error("Invalid barcode length: %d. Must be greater " "than zero." % barcode_len) if len(sequence_read_fps) != 2: option_parser.error("You must provide exactly two sequence read " "filepaths, the first for forward reads and " "second for reverse reads. You specified %d " "filepaths." % len(sequence_read_fps)) fwd_read_f = open(sequence_read_fps[0], 'U') rev_read_f = open(sequence_read_fps[1], 'U') map_f = open(mapping_fp, 'U') (consensus_seq_lookup, log_out) = get_LEA_seq_consensus_seqs( fwd_read_f, rev_read_f, map_f, output_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) for sample_id in consensus_seq_lookup: for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]): consensus_seq = consensus_seq_lookup[sample_id][rand_bc] fwd_consensus, rev_consensus = consensus_seq.split('^') fwd_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, fwd_consensus)) rev_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, rev_consensus)) log_file.write(log_out) log_file.close() fwd_read_f.close() rev_read_f.close() fwd_consensus_outfile.close() rev_consensus_outfile.close() map_f.close()