Exemplo n.º 1
0
def mean_emoji(filename):
    # works for up to 500bp reads
    means = np.zeros(500)
    seq_count = 0

    for r in SeqIO.parse(filename, "fastq"):
        index = 0
        for s in r.letter_annotations["phred_quality"]:
            means[index] += s
            index = index + 1
        seq_count = seq_count + 1

    cleaned = np.trim_zeros(means)
    means_fp = cleaned / seq_count

    fake_seq = ''.join(["a"] * len(means_fp.round()))

    record = SeqRecord(Seq(fake_seq),
                       id="test",
                       name="mean scores",
                       description="example with mean fastq socres",
                       letter_annotations={
                           'phred_quality': list(means_fp.round().astype(int))
                       })

    print("".join([
        emojify(fastq_emoji_map[s])
        for s in QualityIO._get_sanger_quality_str(record)
    ]))
Exemplo n.º 2
0
def get_vcf_qual(quality):
    '''Map a quality value to an emoji'''

    # Hack to do this quickly - use same trick as FASTQE and convert from value to a PHRED encoding then map
    #TODO make this better
    #
    if quality == None:
        bioemojify_qual = emojify(":question:")
    else:
        fake_seq = 'N'
        record_qual = SeqRecord(Seq(fake_seq), id="test", name="lookup",
                                description="example",
                                letter_annotations={'phred_quality': [int(quality)]})
        mapping_dict_qual_use = emaps.fastq_emoji_map_binned
        original_qual = QualityIO._get_sanger_quality_str(record_qual)
        #print(original_qual)
        bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s, ":heart_eyes:")) for s in original_qual])

    return(bioemojify_qual)
Exemplo n.º 3
0
def map_scores(sequence,
               mapping_dict=emaps.fastq_emoji_map,
               default_value=":heart_eyes:",
               mapping_function=emojify,
               spacer=" "):
    '''
    :param sequence:
    :param mapping_dict:
    :param default_value:
    :param mapping_function:
    :param spacer:
    :return:
    '''

    mapped_values = spacer.join([
        mapping_function(mapping_dict.get(s, default_value))
        for s in QualityIO._get_sanger_quality_str(sequence)
    ])
    return (mapped_values)
Exemplo n.º 4
0
def convert_fastq(options):
    '''Convert FASTQ file to emoji. If no FASTQ files are specified on the command line then
    read from the standard input (stdin).

    Arguments:
       options: the command line options of the program
    Result:
       None
    '''

    if options.custom:
        with open(options.custom) as f:
            mapping_dict_use = ast.literal_eval(f.read())
    else:
        mapping_dict_use = local_seq_emoji_map

    if options.custom_qual:
        with open(options.custom_qual) as f:
            mapping_dict_qual_use = ast.literal_eval(f.read())
    elif options.bin:
        mapping_dict_qual_use = emaps.fastq_emoji_map_binned
    else:
        mapping_dict_qual_use = emaps.fastq_emoji_map

    if options.fastq_files:
        for fastq_filename in options.fastq_files:
            logging.info("Processing FASTA file from %s", fastq_filename)
            try:
                if fastq_filename.endswith(".gz"):
                    fastq_file = gzip.open(fastq_filename, 'rt')
                else:
                    fastq_file = open(fastq_filename)

            except IOError as exception:
                exit_with_error(str(exception), EXIT_FILE_IO_ERROR)
            else:
                with fastq_file:
                    for seq in SeqIO.parse(fastq_file, "fastq"):
                        print(emojify(":arrow_forward:")+"  "+seq.id)
                        #print(">"+seq.id)
                        original = seq.seq
                        bioemojify = "".join([emojify(mapping_dict_use.get(s,":heart_eyes:")) for s in original])
                        original_qual = QualityIO._get_sanger_quality_str(seq)
                        bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s,":heart_eyes:")) for s in original_qual])
                        print(bioemojify+"\n"+bioemojify_qual)
#                        print(*zip([a for a in bioemojify if a != " "],[b for b in bioemojify_qual if b != " "]))
    else:
        logging.info("Processing FASTQ file from stdin")
        #stats = FastaStats().from_file(sys.stdin, options.minlen)
        if (binascii.hexlify(sys.stdin.buffer.peek(1)[:2]) == b'1f8b'):
            # print("zipped")
            stdin_file = gzip.open(sys.stdin.buffer, 'rt')
        else:
            stdin_file = sys.stdin

        for seq in SeqIO.parse(stdin_file, "fastq"):
                        print(emojify(":arrow_forward:")+"  "+seq.id)
                        #print(">"+seq.id)
                        original = seq.seq
                        bioemojify = "".join([emojify(mapping_dict_use.get(s,":heart_eyes:")) for s in original])
                        original_qual = QualityIO._get_sanger_quality_str(seq)
                        bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s,":heart_eyes:")) for s in original_qual])
                        print(bioemojify+"\n"+bioemojify_qual)
Exemplo n.º 5
0
def process_files(options):
    '''Compute and print FastaStats for each input FASTA file specified on the
    command line. If no FASTA files are specified on the command line then
    read from the standard input (stdin).

    Arguments:
       options: the command line options of the program
    Result:
       None
    '''
    if options.fasta_files:
        for fasta_filename in options.fasta_files:
            logging.info(
                "Processing FASTA file from {}".format(fasta_filename))
            try:
                fasta_file = open(fasta_filename)
            except IOError as exception:
                exit_with_error(str(exception), EXIT_FILE_IO_ERROR)
            else:
                with fasta_file:
                    stats = FastaStats().from_file(fasta_file, options.minlen)
                    #print(stats.pretty(fasta_filename))

                    if options.scale:
                        print_scale(emaps.all_qualities, options.bin)

                    #rewrite this
                    if options.bin:
                        logging.info("Binned calculations")
                        if options.max:
                            logging.info("Calculate max quality per position")
                            print(
                                stats.pretty(fasta_filename),
                                "max (binned)",
                                " ".join([
                                    emojify(
                                        emaps.fastq_emoji_map_binned.get(
                                            s, ':heart_eyes:'))
                                    for s in QualityIO._get_sanger_quality_str(
                                        stats.quality_scores_maxs)
                                ]),
                                sep='\t')
                        logging.info("Calculate mean quality per position")
                        print(stats.pretty(fasta_filename),
                              "mean (binned)",
                              " ".join([
                                  emojify(
                                      emaps.fastq_emoji_map_binned.get(
                                          s, ':heart_eyes:'))
                                  for s in QualityIO._get_sanger_quality_str(
                                      stats.quality_scores_mean)
                              ]),
                              sep='\t')
                        if options.min:
                            logging.info("Calculate min quality per position")
                            print(
                                stats.pretty(fasta_filename),
                                "min (binned)",
                                " ".join([
                                    emojify(
                                        emaps.fastq_emoji_map_binned.get(
                                            s, ':heart_eyes:'))
                                    for s in QualityIO._get_sanger_quality_str(
                                        stats.quality_scores_mins)
                                ]),
                                sep='\t')
                    else:
                        if options.max:
                            logging.info("Calculate max quality per position")
                            print(
                                stats.pretty(fasta_filename),
                                "max",
                                " ".join([
                                    emojify(
                                        emaps.fastq_emoji_map.get(
                                            s, ':heart_eyes:'))
                                    for s in QualityIO._get_sanger_quality_str(
                                        stats.quality_scores_maxs)
                                ]),
                                sep='\t')
                        logging.info("Calculate mean quality per position")
                        print(stats.pretty(fasta_filename),
                              "mean",
                              " ".join([
                                  emojify(
                                      emaps.fastq_emoji_map.get(
                                          s, ':heart_eyes:'))
                                  for s in QualityIO._get_sanger_quality_str(
                                      stats.quality_scores_mean)
                              ]),
                              sep='\t')
                        if options.min:
                            logging.info("Calculate min quality per position")
                            print(
                                stats.pretty(fasta_filename),
                                "min",
                                " ".join([
                                    emojify(
                                        emaps.fastq_emoji_map.get(
                                            s, ':heart_eyes:'))
                                    for s in QualityIO._get_sanger_quality_str(
                                        stats.quality_scores_mins)
                                ]),
                                sep='\t')

                    #print("MAX:  "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_maxs)]))
                    #print("MEAN: "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_mean)]))
                    #print("MIN:  "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_mins)]))

    else:
        logging.info("Processing FASTA file from stdin")
        stats = FastaStats().from_file(sys.stdin, options.minlen)
        print(stats.pretty("stdin"))
Exemplo n.º 6
0
        return read


totreads = 0
passing_reads = 0
while True:
    try:
        read = R1.next()
        read2 = R2.next()
        totreads += 1
    except StopIteration:
        break
    fil1 = filterSeq(read, 0.1, 10, 100)
    fil2 = filterSeq(read2, 0.1, 10, 100)
    if (fil1 != None) and (fil2 != None):
        sys.stdout.write(fil1.id + "\t" + str(fil1.seq) + "\t" +
                         str(fil2.seq) + "\t" +
                         QualityIO._get_sanger_quality_str(fil1) + "\t" +
                         QualityIO._get_sanger_quality_str(fil2) + "\n")
        passing_reads += 1
    elif (fil1 != None):
        SeqIO.write(fil1, out, "fastq")
    elif (fil2 != None):
        SeqIO.write(fil2, out, "fastq")

sys.stderr.write("\t" + str(passing_reads) + " out of " + str(totreads) +
                 " fragments passed the filtering" + "\n")
data = commands.getstatusoutput('date')
sys.stderr.write("2nd step: filtering out duplicated fragments at " + data[1] +
                 "\n")