示例#1
0
    def testQualOffset(self):
        # Illumina 1.8 with under 59
        content = """@M70265:234:000000000-CCC3N:1:1101:21165:1697 1:N:0:13
ATGTCCTTGTGCACAATGCCCTGGCTATGCAGGTACTCCAGGCCGTCAATCAGCTGACAGAAGTACCTGCGGGCAGCACACACCCGTCCTGGGGCCGAGGCCTCCCTGCCCCTCTCAGGGGCGAATTTCGACGATCGTTGCATTAACTCGC
+
-A-A@EFF9E,C9C,,C,CEEF,,6C9E,@,,,C<EEEE,,,:B7@:,,CC,,CE,,;,,,,,<9@CE,C+++@+,@,C,C,B@>+BBFE,,,+87++++8ABA=FE,B?BFDC==,,,,+6+++@BFD,+8++>>7@D,<,@@,,@7>*>
@M70265:234:000000000-CCC3N:1:1101:14142:1764 1:N:0:13
TGTCAATCAATATCAGGACAAGCAGTGTGTCCTCACGGAAAGGAGCCTGCCCTGCCTGGCCCCCGGCCCCCGCCCCACCCTGGCCCCTGCCCCGCGCACCCACCCGTTGGCCTTGCCCCCTCGGAAACGCTTCTCCCGCACCCTTGCGAAT
+
B<CB9-CF9,F9FDC,,,C8<,C8,C,C,,<CEE9C,,+,6,<,<CBF@<@EE,CFD,,@ADCF7::C@B@+@>CC,:FFC,,4CDC<,,CED+@+>+6+8+?CC+8,+,4:>,,:,:+83++++++8+83>:,33,+3+5*68,,,**1*
@M70265:234:000000000-CCC3N:1:1101:9715:1775 1:N:0:13
TCCAGGGCTTTTGTCTTCTTCCCTTTAGATTCTCTTCTTCTGTACTGCCTGTGCTTTTGCATTCTCTACACTCATCTGTGCCACCGTTTGGAAAGCTAGTGGTTCAGAGTTCTATATATTCTCGAATTTCGCCGATCGTTTCATTAACTCT
+
-8A----8FFGG,E@E@EEF<@6CFF9,,<,;C6C,6CE@C,C,CF,@C,,;,,,<,;,;,,,6,;C,,,6;;,<<E,,,6C,C<+CBA,,,,,,6C,,:,,CC@,,,,<E@F,C,,,<EA,C,,,9?E,,,8++8>+BE+559E,,5=E,"""
        with open(self.tmp_out, "w") as FH_out:
            FH_out.write(content)
        self.assertEqual(FastqIO.qualOffset(self.tmp_out), 33)
        # Illumina 1.8
        content = """@M70265:234:000000000-CCC3N:1:1102:19767:8584 1:N:0:35
TCATGACTGATATGGTAGACAGAGCCTAAACATCCCCTTAAATTGGATTAAAAAGAAATATACCTTTGTTGTTACCTTTAAATGCAAAGTTAAAATAGGCAGAAGTCTTGCCCACATCGTTGTAGGCCTTACATTCAACCGGCGAATTTCG
+
CCCCCFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGFGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGEGGGGFGF
@M70265:234:000000000-CCC3N:1:1102:17014:8587 1:N:0:35
TCCATAACTTCTTGCTAAGTCCTGAGCCTGTTTTGTGTCTACTGTTCTAGAAGGCAAATCACATTTATTTCCTACTAGGACCACAGGTACATCTTCAGAGTCCTTAACTCTTTTAATTTGTTCTCTGGGAAAGAGCGAATTTCGACGATCG
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
@M70265:234:000000000-CCC3N:1:1102:16174:8588 1:N:0:35
CTTGAGTGAAGGACTGAGAAAATCCCTGTTCCCACTCATACAGGACTTGGGAGGTATCCACATCCTCTTCCTCAGGATTGCCTTTACCACTCTGAGAAGGAGCTGTGGTAGTGGCACCAGAATGGATTCCAGAGTCCAGGTAAGACTGCGC
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"""
        with open(self.tmp_out, "w") as FH_out:
            FH_out.write(content)
        self.assertEqual(FastqIO.qualOffset(self.tmp_out), 33)
        # Solexa
        content = """@SRR1296011.1 1 length=107
CGGCAAGTTAACAAAAAGAAAAATGGTGAATGATACCCGGTGCTGGCAATCTCGTTTAAACTACATGCAGGAACAGCAAAGGAAATCCGGCAAATTTGCGCAGTCAT
+SRR1296011.1 1 length=107
dddddaa]aafffc`c_ccc`cccf_^cddf_fcddd`ddWdd^a]daadf[fdcffaafcffcfcff]fcfffW^I^a^^KZdaffc_cWbc[cN[[X^]`a``ca
@SRR1296011.2 2 length=107
AAATTTGCCGGATTTCCTTTGCTGTTCCTGCATGTAGTTTAAACGAGATTGCCAGCACCGGGTATCATTCACCATTTTTCTTTTTGTTAACTTGCCGTCAGCCTTTT
+SRR1296011.2 2 length=107
gggggggaggggggggggfgfgggggggcgggggggc_geggfggggggggggaggggggggggdggffggfgggaggeegcgggeggggeffgac]dbcaggeab_
@SRR1296011.3 3 length=107
CTTTCTGTTCATGTGTATCTGCTGTCTCTTAGCCCAGACTTCCCGTGTCCTTTCCACTGGGCCTTTGGGAGGTCACAGGGTCTTGATGCTGTGGTCTTGATCTGCAG
+SRR1296011.3 3 length=107
fffdffgggggc_aggaggggfe_afffffgggggfgggggggggddgge_aWdaggggg]]cfffffedfeUeaacff_Wcfcc`bb]d__b^Zacaa[]\```_b"""
        with open(self.tmp_out, "w") as FH_out:
            FH_out.write(content)
        self.assertEqual(FastqIO.qualOffset(self.tmp_out), 64)
                        help="The maximun quality. [Default: No maximun]")
    group_input = parser.add_argument_group('Inputs')  # Inputs
    group_input.add_argument(
        '-i',
        '--input-file',
        required=True,
        help='Path to the sequences file (format: fastq).')
    group_output = parser.add_argument_group('Outputs')  # Outputs
    group_output.add_argument('-o',
                              '--output-file',
                              required=True,
                              help='Path to the output (format: fastq).')
    args = parser.parse_args()

    # Process
    old_offset = args.old_offset if args.old_offset is not None else FastqIO.qualOffset(
        args.input_file)
    if old_offset is None:
        raise Exception(
            "The quality offset in {} cannot be determined.".format(
                args.input_file))
    offset_modifier = args.new_offset - old_offset
    with FastqIO(args.output_file, "w") as FH_out:
        with FastqIO(args.input_file) as FH_in:
            for record in FH_in:
                new_qual = ""
                for curr_qual in record.quality:
                    new_qual_numer = ord(curr_qual) + offset_modifier
                    if args.min_qual is not None:
                        new_qual_numer = max(args.new_offset + args.min_qual,
                                             new_qual_numer)
                    if args.max_qual is not None: