Пример #1
0
    def testUpdate2(self):
        '''issue 135: inplace update of sequence and quality score.

        This does not work as setting the sequence will erase
        the quality scores.
        '''
        a = self.buildRead()
        a.query_sequence = a.query_sequence[5:10]
        self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)

        a = self.buildRead()
        s = pysam.qualities_to_qualitystring(a.query_qualities)
        a.query_sequence = a.query_sequence[5:10]
        a.query_qualities = pysam.qualitystring_to_array(s[5:10])

        self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), s[5:10])
Пример #2
0
    def testUpdate2(self):
        '''issue 135: inplace update of sequence and quality score.

        This does not work as setting the sequence will erase
        the quality scores.
        '''
        a = self.buildRead()
        a.query_sequence = a.query_sequence[5:10]
        self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)

        a = self.buildRead()
        s = pysam.qualities_to_qualitystring(a.query_qualities)
        a.query_sequence = a.query_sequence[5:10]
        a.query_qualities = pysam.qualitystring_to_array(s[5:10])

        self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), s[5:10])
Пример #3
0
def saveread(alignedRead, outfile1, outfile2):
    global r1
    global r2
    global dr1
    global dr2

    if alignedRead.is_read1:
        n = alignedRead.qname
        if alignedRead.is_reverse:
            s = "@" + n + '\n' + strRevComp(
                alignedRead.seq
            ) + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                alignedRead.query_qualities[::-1]) + '\n'
        else:
            s = "@" + n + '\n' + alignedRead.seq + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                alignedRead.query_qualities) + '\n'
        if n in r2:
            outfile1.write(s.encode())
            outfile2.write(zlib.decompress(r2[n]))
            del r2[n]
        elif n in dr2:
            outfile1.write(s.encode())
            outfile2.write(zlib.decompress(dr2[n]))
            del dr2[n]
        else:
            r1[n] = zlib.compress(s.encode(), 1)

    elif alignedRead.is_read2:
        n = alignedRead.qname
        if alignedRead.is_reverse:
            s = "@" + n + '\n' + strRevComp(
                alignedRead.seq
            ) + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                alignedRead.query_qualities[::-1]) + '\n'
        else:
            s = "@" + n + '\n' + alignedRead.seq + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                alignedRead.query_qualities) + '\n'
        if n in r1:
            outfile1.write(zlib.decompress(r1[n]))
            outfile2.write(s.encode())
            del r1[n]
        elif n in dr1:
            outfile1.write(zlib.decompress(dr1[n]))
            outfile2.write(s.encode())
            del dr1[n]
        else:
            r2[n] = zlib.compress(s.encode(), 1)
Пример #4
0
Файл: seq.py Проект: xtmgah/wub
def quality_array_to_string(quality_list):
    """Convert list of phred quality values to string.

    :param quality_list: List of phred quality scores.
    :returns: Quality string.
    :rtype: str
    """
    return pysam.qualities_to_qualitystring(quality_list)
Пример #5
0
def writepair(ar1, ar2, outfile1, outfile2, n):
    if ar1.is_reverse:
        s = "@" + n + '\n' + strRevComp(
            ar1.seq) + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                ar1.query_qualities[::-1]) + '\n'
        outfile1.write(s.encode())
        s = "@" + n + '\n' + ar2.seq + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
            ar2.query_qualities) + '\n'
        outfile2.write(s.encode())
    else:
        s = "@" + n + '\n' + ar1.seq + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
            ar1.query_qualities) + '\n'
        outfile1.write(s.encode())
        s = "@" + n + '\n' + strRevComp(
            ar2.seq) + '\n' + "+" + '\n' + pysam.qualities_to_qualitystring(
                ar2.query_qualities[::-1]) + '\n'
        outfile2.write(s.encode())
Пример #6
0
 def ParseReadDictionary(self, read_dictionary):
     """
     parse read dictionary and return header, sequence, and quality as list
     """
     for i in read_dictionary:
         header = i
         seq = ''.join(read_dictionary[i]['seq'])
         qual = ps.qualities_to_qualitystring(read_dictionary[i]['qual'])
         yield [header, seq, qual]
Пример #7
0
def write_reads(in_bam, out_dir, ctg):
	with open(os.path.join(out_dir, ctg+".fq"), 'w') as fout:
		with pysam.AlignmentFile(in_bam, 'rb') as bam:
			for line in bam.fetch(contig=ctg):
				rn = line.query_name
				seq = line.query_sequence
				ctg = line.reference_name
				qual = pysam.qualities_to_qualitystring(line.query_qualities)
				if line.mapq == 0 or line.mapq == 255:
					continue
				fout.write("@%s\n%s\n+\n%s\n"%(rn, seq, qual))
Пример #8
0
def make_read(seq, cigar, mdtag=None, name="dummy", mapq=10, baseq=30):
    read = pysam.AlignedSegment()
    read.seq = seq
    read.cigarstring = cigar
    if mdtag:
        read.set_tag("MD", mdtag)
    read.qname = name
    read.mapq = mapq
    qualities_string = pysam.qualities_to_qualitystring([baseq] * len(seq))
    qualities_bytes = qualities_string.encode("ascii")
    read.qual = qualities_bytes
    return read
Пример #9
0
def make_read(seq, cigar, mdtag=None, name="dummy", mapq=10, baseq=30):
    read = pysam.AlignedSegment()
    read.seq = seq
    read.cigarstring = cigar
    if mdtag:
        read.set_tag("MD", mdtag)
    read.qname = name
    read.mapq = mapq
    qualities_string = pysam.qualities_to_qualitystring([baseq] * len(seq))
    qualities_bytes = qualities_string.encode("ascii")
    read.qual = qualities_bytes
    return read
Пример #10
0
 def testEmpty(self):
     a = pysam.AlignedSegment()
     self.assertEqual(a.query_name, None)
     self.assertEqual(a.query_sequence, None)
     self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)
     self.assertEqual(a.flag, 0)
     self.assertEqual(a.reference_id, -1)
     self.assertEqual(a.mapping_quality, 0)
     self.assertEqual(a.cigartuples, None)
     self.assertEqual(a.tags, [])
     self.assertEqual(a.next_reference_id, -1)
     self.assertEqual(a.next_reference_start, -1)
     self.assertEqual(a.template_length, 0)
Пример #11
0
 def testEmpty(self):
     a = pysam.AlignedSegment()
     self.assertEqual(a.query_name, None)
     self.assertEqual(a.query_sequence, None)
     self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)
     self.assertEqual(a.flag, 0)
     self.assertEqual(a.reference_id, 0)
     self.assertEqual(a.mapping_quality, 0)
     self.assertEqual(a.cigartuples, None)
     self.assertEqual(a.tags, [])
     self.assertEqual(a.next_reference_id, 0)
     self.assertEqual(a.next_reference_start, 0)
     self.assertEqual(a.template_length, 0)
Пример #12
0
def make_pysam_read(seq,
                    cigar,
                    mdtag=None,
                    name="dummy",
                    mapq=10,
                    baseq=30,
                    reference_start=0,
                    reference_id=0):
    read = pysam.AlignedSegment()
    read.seq = seq
    read.cigarstring = cigar
    if mdtag:
        read.set_tag("MD", mdtag)
    read.qname = name
    read.mapq = mapq
    read.reference_start = reference_start
    read.reference_id = reference_id
    qualities_string = pysam.qualities_to_qualitystring([baseq] * len(seq))
    read.qual = qualities_string.encode("ascii")
    return read
Пример #13
0
def test_realign_rc(genome_source):
    read = pysam.AlignedSegment()
    read.query_sequence = genome_source.get_seq("chr1", 30, 50, "-")

    alns = genome_source.align(Alignment(read))
    assert len(alns) == 1
    assert alns[0].cigarstring == "21M"
    assert alns[0].reference_start == 30
    assert alns[0].reference_end == 51
    assert alns[0].is_reverse

    qs = "<<<<<<<:<9/,&,22;;<<<"
    read.query_qualities = pysam.qualitystring_to_array(qs)
    alns = genome_source.align(Alignment(read))

    import warnings
    with warnings.catch_warnings():
        # this is a python 2/3 incompatibility I think, where the warning
        # indicates array.tostring() is deprecated but array.tobytes()
        # only exists in py3
        warnings.simplefilter("ignore")
        assert pysam.qualities_to_qualitystring(
            alns[0].query_qualities) == qs[::-1]
Пример #14
0
def make_read_info(source_align_file, art_aligned_mapped_reads,
                   art_aligned_unmapped_reads):
    global LOGGER
    LOGGER.info("Extracting info from source SAM file (%s)..." %
                source_align_file)

    mapped_reads_info = {}
    unmapped_reads_info = {}

    with pysam.AlignmentFile(source_align_file) as f:
        for r in f:
            if r.is_secondary or r.is_supplementary:
                continue

            query_name = r.query_name
            sequence = r.query_sequence
            is_spliced = False

            if not r.is_unmapped:
                if query_name in art_aligned_mapped_reads:
                    if "N" in r.cigarstring:
                        is_spliced = True

                    mapped_reads_info[query_name] = (r.reference_id,
                                                     r.reference_start,
                                                     r.reference_end,
                                                     r.mapping_quality,
                                                     is_spliced)
            else:
                if query_name in art_aligned_unmapped_reads:
                    unmapped_reads_info[query_name] = (
                        sequence,
                        pysam.qualities_to_qualitystring(r.query_qualities))

    LOGGER.info("Completed info extraction")

    return mapped_reads_info, unmapped_reads_info
Пример #15
0
def to_unmapped(segment, unal_read1, unal_read2):
    if segment.is_read1:
        if segment.query_name in fwd_read_dict:
            seq = fwd_read_dict[segment.query_name]
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                qual = qual[::-1]
        else:
            seq = segment.query_sequence
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                seq = reverse_complement(seq)
                qual = qual[::-1]
        unal_read1.write("".join(
            ["@", segment.query_name, "\n", seq, '\n+\n', qual, '\n']))
    elif segment.is_read2:
        if segment.query_name in rev_read_dict:
            seq = rev_read_dict[segment.query_name]
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                qual = qual[::-1]
        else:
            seq = segment.query_sequence
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                seq = reverse_complement(seq)
                qual = qual[::-1]
        unal_read2.write("".join(
            ["@", segment.query_name, "\n", seq, '\n+\n', qual, '\n']))
    else:  #single end and fully unmapped
        if segment.query_name in fwd_read_dict:
            seq = fwd_read_dict[segment.query_name]
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                qual = qual[::-1]
        else:
            seq = segment.query_sequence
            qual = qualities_to_qualitystring(segment.query_qualities)
            if segment.is_reverse:
                seq = reverse_complement(seq)
                qual = qual[::-1]
        unal_read1.write("".join(
            ["@", segment.query_name, "\n", seq, '\n+\n', qual, '\n']))
Пример #16
0
    def _init_vardict(self):
        """Init the variable dictionary (context for eval/code exec).

        Tricks:
            - init only those variable that appear as a substring
        """

        self.vardict = self.init_vardict

        alignment = self.alignment

        if 'a' in self.possible_vars:
            self.vardict['a'] = alignment
        if 'QNAME' in self.possible_vars:
            self.vardict['QNAME'] = alignment.query_name
        if 'FLAG' in self.possible_vars:
            self.vardict['FLAG'] = alignment.flag
        if 'POS' in self.possible_vars:
            self.vardict['POS'] = alignment.reference_start + 1
        if 'MAPQ' in self.possible_vars:
            self.vardict['MAPQ'] = alignment.mapping_quality
        if 'CIGAR' in self.possible_vars:
            self.vardict['CIGAR'] = alignment.cigarstring
        if 'PNEXT' in self.possible_vars:
            self.vardict['PNEXT'] = alignment.next_reference_start + 1
        if 'TLEN' in self.possible_vars:
            self.vardict['TLEN'] = alignment.template_length
        if 'SEQ' in self.possible_vars:
            self.vardict['SEQ'] = alignment.query_sequence
        if 'RNAMEi' in self.possible_vars:
            self.vardict['RNAMEi'] = alignment.reference_id
        if 'RNEXTi' in self.possible_vars:
            self.vardict['RNEXTi'] = alignment.next_reference_id

        # the specific implementation depends on the specific version of PySam, we want the same behaviour
        if isinstance(alignment.qual, str):
            if 'QUAL' in self.possible_vars:
                self.vardict['QUAL'] = alignment.qual
            if 'QUALa' in self.possible_vars:
                self.vardict['QUALa'] = [ord(x) for x in alignment.qual]
            if 'QUALs' in self.possible_vars:
                self.vardict['QUALs'] = alignment.qqual
            if 'QUALsa' in self.possible_vars:
                self.vardict['QUALsa'] = [ord(x) for x in alignment.qqual]
        else:
            if 'QUAL' in self.possible_vars:
                self.vardict['QUAL'] = pysam.qualities_to_qualitystring(
                    alignment.qual, offset=0)
            if 'QUALa' in self.possible_vars:
                self.vardict['QUALa'] = alignment.qual
            if 'QUALs' in self.possible_vars:
                self.vardict['QUALs'] = pysam.qualities_to_qualitystring(
                    alignment.qqual, offset=0)
            if 'QUALsa' in self.possible_vars:
                self.vardict['QUALsa'] = alignment.qqual

        if 'RNAME' in self.possible_vars:
            if alignment.reference_id == -1:
                self.vardict['RNAME'] = '*'
            else:
                self.vardict['RNAME'] = self.in_sam.get_reference_name(
                    alignment.reference_id)

        if 'RNEXT' in self.possible_vars:
            if alignment.next_reference_id == -1:
                self.vardict['RNEXT'] = '*'
            else:
                self.vardict['RNEXT'] = self.in_sam.get_reference_name(
                    alignment.next_reference_id)
Пример #17
0
def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-i",
                      "--input-fastq-file",
                      dest="input_fastq_file",
                      type="string",
                      help="input fastq file. "
                      "[%default]")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("read-variant", "depth-vcf", "read-list",
                               "coverage-vcf", "barcode"),
                      help="method to apply [%default]")

    parser.add_option(
        "-e",
        "--input-bed",
        dest="input_bed_file",
        type="string",
        help="input file with intervals. Tab-delimited file of intervals "
        "in bed format to restrict analysis to. [%default]")

    parser.add_option(
        "-r",
        "--region-string",
        dest="region_string",
        type="string",
        help="region string. Only apply method in specified region. "
        "[%default]")

    parser.add_option("-f",
                      "--reference-fasta-file",
                      dest="reference_fasta_file",
                      help="reference genomic sequence in fasta format. "
                      "[%default]")

    parser.add_option("--min-base-quality",
                      dest="min_base_quality",
                      type="int",
                      help="minimum base quality for barcode analysis. "
                      "[%default]")

    parser.add_option("-s",
                      "--stepper",
                      dest="stepper",
                      type="choice",
                      choices=("nofilter", "samtools", "all"))

    parser.set_defaults(method="read-variant",
                        reference_fasta_file=None,
                        input_bed_file=None,
                        regex_sample_name="([^/]+).bam",
                        stepper="nofilter",
                        min_base_quality=13,
                        region_string=None)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.start(parser, argv=argv, add_output_options=True)

    pysam_in = pysam.AlignmentFile(args[0], "rb")

    if options.input_bed_file:
        if not os.path.exists(options.input_bed_file):
            raise OSError("input bed file {} does not exist".format(
                options.input_bed_file))
        bed_in = pysam.TabixFile(options.input_bed_file)
    else:
        bed_in = None

    if options.region_string is not None:
        itr = generate_from_region(pysam_in,
                                   options.region,
                                   stepper=options.stepper,
                                   min_base_quality=options.min_base_quality)
    elif bed_in is not None:
        itr = generate_from_bed(pysam_in,
                                bed_in,
                                stepper=options.stepper,
                                min_base_quality=options.min_base_quality)
    else:
        itr = generate_from_bam(pysam_in,
                                stepper=options.stepper,
                                min_base_quality=options.min_base_quality)

    reference_fasta = pysam.FastaFile(options.reference_fasta_file)

    outf = options.stdout
    counter = E.Counter()

    if options.method == "read-variant":
        outf.write("chromosome\tposition\tref\ttypes\n")

        for pileupcolumn in itr:
            counter.positions_pileup += 1
            reference_base = reference_fasta.fetch(
                pileupcolumn.reference_name, pileupcolumn.reference_pos,
                pileupcolumn.reference_pos + 1)
            matches = []
            bases = set()
            for read in pileupcolumn.pileups:
                qpos = read.query_position
                if qpos is not None:
                    base = read.alignment.query_sequence[qpos]
                else:
                    base = "-"

                matches.append((base, read.alignment.query_name))
                bases.add(base)

            bases = list(bases)
            if len(bases) == 1:
                counter.position_noninformative += 1
                if bases[0] == reference_base:
                    counter.position_reference += 1
                continue

            counter.position_informative += 1

            d = {}
            for base in bases:
                d[base] = ",".join([x[1] for x in matches if x[0] == base])

            outf.write("{}\t{}\t{}\t{}\n".format(pileupcolumn.reference_name,
                                                 pileupcolumn.reference_pos,
                                                 reference_base,
                                                 json.dumps(d)))

    elif options.method in ("depth-vcf", "coverage-vcf"):
        if options.regex_sample_name:
            sample_name = re.search(options.regex_sample_name,
                                    args[0]).groups()[0]
        else:
            sample_name = "unknown"

        outf.write("##fileformat=VCFv4.1\n")
        outf.write("##FORMAT=<ID=GT,Number=1,Type=String,"
                   "Description=\"Genotype\">\n")
        outf.write("##FORMAT=<ID=DP,Number=1,Type=Integer,"
                   "Description=\"Genotype\">\n")
        outf.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\t"
                   "FILTER\tINFO\tFORMAT\t{}\n".format(sample_name))

        is_depth = options.method == "depth-vcf"

        for idx, pileupcolumn in enumerate(itr):

            if idx % 1000 == 0:
                E.info("processed {} positions".format(idx))

            reference_base = reference_fasta.fetch(
                pileupcolumn.reference_name, pileupcolumn.reference_pos,
                pileupcolumn.reference_pos + 1).upper()

            if reference_base == 'A':
                alt_base = 'C'
            else:
                alt_base = 'A'

            if is_depth:
                n = sum([
                    1 for x in pileupcolumn.pileups
                    if not (x.is_del or x.is_refskip)
                ])
            else:
                n = pileupcolumn.n

            outf.write("{}\t{}\t.\t{}\t{}\t.\tPASS\t.\tGT:DP\t0/1:{}\n".format(
                pileupcolumn.reference_name, pileupcolumn.reference_pos,
                reference_base, alt_base, n))

    elif options.method == "read-list":
        outf.write(
            "chromosome\tposition\treference_base\tbase\tquality\tquery_name\n"
        )

        for pileupcolumn in itr:
            reference_base = reference_fasta.fetch(
                pileupcolumn.reference_name, pileupcolumn.reference_pos,
                pileupcolumn.reference_pos + 1)
            matches = []
            for read in pileupcolumn.pileups:
                qpos = read.query_position
                if qpos is not None:
                    base = read.alignment.query_sequence[qpos]
                    quality = read.alignment.query_qualities[qpos]
                else:
                    base = "-"
                    quality = ""

                outf.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(
                    pileupcolumn.reference_name, pileupcolumn.reference_pos,
                    reference_base, base, quality, read.alignment.query_name))

    elif options.method == "barcode":

        rows = []
        for c in itr:
            rows.append(
                (c.reference_pos, c.n, "".join(c.get_query_sequences()),
                 pysam.qualities_to_qualitystring(c.get_query_qualities())))
        df = pandas.DataFrame.from_records(
            rows, columns=["pos", "gapped_depth", "bases", "qualities"])

        df["depth"] = df.bases.str.len()
        bases = ["A", "C", "G", "T"]
        for b in bases:
            df[b] = df.bases.str.upper().str.count(b)
        df["consensus"] = df[bases].idxmax(axis=1)
        df["consensus_counts"] = df.lookup(df.index, df.consensus)
        df["consensus_support"] = df.consensus_counts / df.depth
        df["offconsensus_counts"] = df.depth - df.consensus_counts
        df.loc[df.consensus_counts == 0, "consensus"] = "N"

        df.to_csv(outf, sep="\t", index=False)

    E.info(counter)
    # write footer and output benchmark information.
    E.stop()
Пример #18
0
    def generate_barcode(self, split_fq_sam_file_List, outdir, samtools_path,
                         split_modify_fq_file_List, file_sign, maxsize,
                         barcode_sequence):
        fqsamlist = list()
        rsplit_fq_sam_file_List = open(split_fq_sam_file_List, 'r')
        for split_fq_sam_file_List_info in rsplit_fq_sam_file_List:
            fqsamlist.append(
                (re.split("\t", split_fq_sam_file_List_info.strip()))[1])
            otherfiletmplist.append(
                (re.split("\t", split_fq_sam_file_List_info.strip()))[1])
        mergedFqBam = fqsamlist[0].replace("0.fq.sam.bam", "fq.merged.bam")
        shelldir = outdir + "/shell"
        check_info(shelldir, "dir")
        shell = shelldir + "/merge_fqbam.sh"
        wshell = open(shell, 'w')
        if len(fqsamlist) > 1:
            allfqsam = " ".join(fqsamlist)
            shell_line = " ".join(
                [samtools_path, "merge -f", mergedFqBam, allfqsam]) + "\n"
        else:
            shell_line = " ".join(["ln -sf", fqsamlist[0], mergedFqBam]) + "\n"
        wshell.write(shell_line)
        sv = find_samtools_version(samtools_path, shelldir)
        sortedFqBamprefix = mergedFqBam.replace("fq.merged.bam",
                                                "fq.merged.sorted")
        sortedFqBam = sortedFqBamprefix + ".bam"
        if sv == 0:
            shell_line = " ".join([
                samtools_path, "sort -n -m 1G", mergedFqBam, sortedFqBamprefix
            ]) + "\n"
        else:
            shell_line = " ".join([
                samtools_path, "sort -n -m 1G -o", sortedFqBamprefix + ".bam",
                mergedFqBam
            ]) + "\n"
        wshell.write(shell_line)
        shell_line = " ".join([samtools_path, "index", sortedFqBam]) + "\n"
        wshell.write(shell_line)
        wshell.close()
        subprocess.call(["sh", shell])
        new_fq_prefix = sortedFqBam.replace("fq.merged.sorted.bam", "")
        otherfiletmplist.append(mergedFqBam)
        otherfiletmplist.append(sortedFqBam)
        otherfiletmplist.append(sortedFqBam + ".bai")

        wsplit_modify_fq_file_List = open(split_modify_fq_file_List, 'w')
        SplitSize = 0
        rsortedFqBam = pysam.AlignmentFile(sortedFqBam, 'rb')
        readid = "N"
        barcodeid = 0
        barcode_marker = "N"
        new_barcode_sequence = "N"
        start = 11

        s = 0
        split_modify_fq_file = fqsamlist[0].replace(
            "0.fq.sam.bam", "") + str(s) + ".BX.modified.fq.gz"
        wsplit_modify_fq_file_List.write(split_modify_fq_file + "\n")
        wsplit_modify_fq_file = gzip.open(split_modify_fq_file, 'wb')
        for FqBaminfo in rsortedFqBam:
            (real_barcode, real_readid) = re.split("_", FqBaminfo.query_name)
            if FqBaminfo.query_name != readid:
                readid = FqBaminfo.query_name
                if barcode_marker != real_barcode:
                    barcode_marker = real_barcode
                    (start, new_barcode_suffix) = self.get_new_barcode(
                        barcodeid, start)
                    barcodeid += 1
                    new_barcode_sequence = barcode_sequence + new_barcode_suffix
                real_readid = '@' + real_readid + "/1\tBC:Z:" + new_barcode_sequence
            else:
                real_readid = '@' + real_readid + "/2\tBC:Z:" + new_barcode_sequence
            SplitSize += 4
            complete_read_info = "\n".join([
                real_readid, FqBaminfo.query_sequence, "+",
                pysam.qualities_to_qualitystring(FqBaminfo.query_qualities)
            ]) + "\n"
            if SplitSize > maxsize:
                wsplit_modify_fq_file.close()
                s += 1
                SplitSize = 4
                split_modify_fq_file = fqsamlist[0].replace(
                    "0.fq.sam.bam", "") + str(s) + ".BX.modified.fq.gz"
                wsplit_modify_fq_file_List.write(split_modify_fq_file + "\n")
                wsplit_modify_fq_file = gzip.open(split_modify_fq_file, 'wb')
            wsplit_modify_fq_file.write(complete_read_info.encode())
        rsortedFqBam.close()
        wsplit_modify_fq_file_List.close()
        wsplit_modify_fq_file.close()

        wfile_sign = open(file_sign, 'w')
        wfile_sign.write("done!\n")
        wfile_sign.close

        return (split_modify_fq_file_List)