Python BtLog.progress примеры, lib.BtLog.progress Python примеры использования

Пример #1

0

Показать файл

Файл: BtIO.py Проект: hyphaltip/blobtools

def readCas(infile, order_of_blobs):
    seqs_total, reads_total, reads_mapped = checkCas(infile)
    progress_unit = int(len(order_of_blobs)/100)
    cas_line_re = re.compile(r"\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+.\d{2})\s+(\d+)\s+(\d+.\d{2})")
    command = "clc_mapping_info -n " + infile
    cov_dict = {}
    read_cov_dict = {}
    seqs_parsed = 0 
    if (runCmd(command)):
        for line in runCmd(command):
            cas_line_match = cas_line_re.search(line)
            if cas_line_match:
                idx = int(cas_line_match.group(1)) - 1 # -1 because index of contig list starts with zero 
                try:
                    name = order_of_blobs[idx]
                    reads = int(cas_line_match.group(3))
                    cov = float(cas_line_match.group(6))
                    cov_dict[name] = cov
                    read_cov_dict[name] = reads
                    seqs_parsed += 1
                except:
                    pass
            BtLog.progress(seqs_parsed, progress_unit, seqs_total)
        BtLog.progress(seqs_total, progress_unit, seqs_total)
    return cov_dict, reads_total, reads_mapped, read_cov_dict

Пример #2

0

Показать файл

Файл: BtIO.py Проект: hyphaltip/blobtools

def readBam(infile, set_of_blobs):
    reads_total, reads_mapped = checkBam(infile)
    progress_unit = int(int(reads_mapped)/1000) + 1 # lazy fix
    base_cov_dict = {}
    read_cov_dict = {}
    cigar_match_re = re.compile(r"(\d+)M") # only gets digits before M's
    # execute samtools to get only mapped reads
    command = "samtools view -F 4 " + infile
    # ADD flag picard -F 1028 to not consider optical duplicates
    #command = "samtools view -F 1028 " + infile
    # only one counter since only yields mapped reads
    parsed_reads = 0 
    for line in runCmd(command):
        match = line.split("\t")
        if match >= 11:
            seq_name = match[2]
            base_cov = sum([int(matching) for matching in cigar_match_re.findall(match[5])])
            if (base_cov):
                parsed_reads += 1
                if seq_name not in set_of_blobs:
                    print BtLog.warn_d['2'] % (seq_name, infile)
                else:
                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name, 0) + base_cov 
                    read_cov_dict[seq_name] = read_cov_dict.get(seq_name, 0) + 1 
        BtLog.progress(parsed_reads, progress_unit, reads_total)
    BtLog.progress(reads_total, progress_unit, reads_total)
    if not int(reads_mapped) == int(parsed_reads):
        print warn_d['3'] % (reads_mapped, parsed_reads)
    return base_cov_dict, reads_total, parsed_reads, read_cov_dict

Пример #3

0

Показать файл

Файл: BtIO.py Проект: mc-assemblage/blobtools

def readBam(infile, set_of_blobs):
    reads_total, reads_mapped = checkBam(infile)
    progress_unit = int(int(reads_mapped) / 1000) + 1  # lazy fix
    base_cov_dict = {}
    read_cov_dict = {}
    cigar_match_re = re.compile(r"(\d+)M")  # only gets digits before M's
    # execute samtools to get only mapped reads
    command = "samtools view -F 4 " + infile
    # ADD flag picard -F 1028 to not consider optical duplicates
    #command = "samtools view -F 1028 " + infile
    # only one counter since only yields mapped reads
    parsed_reads = 0
    for line in runCmd(command):
        match = line.split("\t")
        if match >= 11:
            seq_name = match[2]
            base_cov = sum([
                int(matching) for matching in cigar_match_re.findall(match[5])
            ])
            if (base_cov):
                parsed_reads += 1
                if seq_name not in set_of_blobs:
                    print BtLog.warn_d['2'] % (seq_name, infile)
                else:
                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name,
                                                                0) + base_cov
                    read_cov_dict[seq_name] = read_cov_dict.get(seq_name,
                                                                0) + 1
        BtLog.progress(parsed_reads, progress_unit, reads_total)
    BtLog.progress(reads_total, progress_unit, reads_total)
    if not int(reads_mapped) == int(parsed_reads):
        print warn_d['3'] % (reads_mapped, parsed_reads)
    return base_cov_dict, reads_total, parsed_reads, read_cov_dict

Пример #4

0

Показать файл

def readBam(infile, fasta_headers):
    reads_total, reads_mapped = checkBam(infile)
    progress_unit = int(int(reads_total) / 1000)
    base_cov_dict = {}
    cigar_match_re = re.compile(r"(\d+)M")  # only gets digits before M's

    read_cov_dict = {}
    # execute samtools to get only mapped reads from primary alignment
    command = "samtools view -q " + str(mq) + " -F 256 -F 4 " + infile
    # only one counter since only yields mapped reads
    parsed_reads = 0
    for line in runCmd(command):
        match = line.split("\t")
        seq_name = match[2]
        if seq_name not in fasta_headers:
            print BtLog.warn_d['2'] % (seq_name, infile)
        else:
            read_cov_dict[seq_name] = read_cov_dict.get(seq_name, 0) + 1
            if not (no_base_cov_flag):
                base_cov = sum([
                    int(matching)
                    for matching in cigar_match_re.findall(match[5])
                ])
                if (base_cov):
                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name,
                                                                0) + base_cov
            parsed_reads += 1
        BtLog.progress(parsed_reads, progress_unit, reads_total)
    BtLog.progress(reads_total, progress_unit, reads_total)
    return base_cov_dict, read_cov_dict, reads_total, parsed_reads

Пример #5

0

Показать файл

Файл: bam2cov.py Проект: evolgenomology/blobtools

def readBam(infile, fasta_headers):
    reads_total, reads_mapped = checkBam(infile)
    progress_unit = int(int(reads_total)/1000)
    base_cov_dict = {}
    cigar_match_re = re.compile(r"(\d+)M") # only gets digits before M's

    read_cov_dict = {}
    # execute samtools to get only mapped reads from primary alignment
    command = "samtools view -q " + str(mq) + " -F 256 -F 4 " + infile
    # only one counter since only yields mapped reads
    parsed_reads = 0
    for line in runCmd(command):
        match = line.split("\t")
        seq_name = match[2]
        if seq_name not in fasta_headers:
            print BtLog.warn_d['2'] % (seq_name, infile)
        else:
            read_cov_dict[seq_name] = read_cov_dict.get(seq_name, 0) + 1
            if not (no_base_cov_flag):
                base_cov = sum([int(matching) for matching in cigar_match_re.findall(match[5])])
                if (base_cov):
                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name, 0) + base_cov
            parsed_reads += 1
        BtLog.progress(parsed_reads, progress_unit, reads_total)
    BtLog.progress(reads_total, progress_unit, reads_total)
    return base_cov_dict, read_cov_dict, reads_total, parsed_reads

Пример #6

0

Показать файл

def parseCas(infile, order_of_blobs):
    if not isfile(infile):
        BtLog.error('0', infile)
    seqs_total, reads_total, reads_mapped = checkCas(infile)
    progress_unit = int(len(order_of_blobs) / 100)
    cas_line_re = re.compile(
        r"\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+.\d{2})\s+(\d+)\s+(\d+.\d{2})")
    command = "clc_mapping_info -n " + infile
    cov_dict = {}
    read_cov_dict = {}
    seqs_parsed = 0
    if (runCmd(command=command)):
        for line in runCmd(command=command):
            cas_line_match = cas_line_re.search(line)
            if cas_line_match:
                idx = int(cas_line_match.group(
                    1)) - 1  # -1 because index of contig list starts with zero
                try:
                    name = order_of_blobs[idx]
                    reads = int(cas_line_match.group(3))
                    cov = float(cas_line_match.group(6))
                    cov_dict[name] = cov
                    read_cov_dict[name] = reads
                    seqs_parsed += 1
                except:
                    pass
                BtLog.progress(seqs_parsed, progress_unit, seqs_total)
    return cov_dict, reads_total, reads_mapped, read_cov_dict

Пример #7

0

Показать файл

Файл: BtIO.py Проект: hyphaltip/blobtools

def writeNodesDB(nodesDB, nodesDB_f):
    nodes_count = nodesDB['nodes_count']
    i = 0
    with open(nodesDB_f, 'w') as fh:
        fh.write("# nodes_count = %s\n" % nodes_count) 
        for node in nodesDB:
            if not node == "nodes_count": 
                i += 1
                BtLog.progress(i, 1000, nodes_count)
                fh.write("%s\t%s\t%s\t%s\n" % (node, nodesDB[node]['rank'], nodesDB[node]['name'], nodesDB[node]['parent']))

Пример #8

0

Показать файл

def parseBam(infile, set_of_blobs, no_base_cov_flag):
    '''
    checkBam returns reads_total and reads_mapped
    base_cov_dict is list of coverages for each contigs, since list appending should be faster

    '''
    if not isfile(infile):
        BtLog.error('0', infile)
    reads_total, reads_mapped = checkBam(infile)
    progress_unit = int(reads_mapped / 1000)
    base_cov_dict = {blob: [] for blob in set_of_blobs}
    #base_cov_dict = {blob : 0 for blob in set_of_blobs}
    read_cov_dict = {blob: 0 for blob in set_of_blobs}
    cigar_match_re = re.compile(
        r"(\d+)M|X|=")  # only gets digits before M,X,='s
    # execute samtools to get only mapped reads (no optial duplicates, no 2nd-ary alignment)
    command = blobtools.SAMTOOLS + " view -F 1024 -F 4 -F 256 " + infile
    seen_reads = 0
    #import time
    #start = time.time()
    if not (no_base_cov_flag):
        for line in runCmd(command=command):
            seen_reads += 1
            match = line.split()
            try:
                base_cov_dict[match[2]].append(
                    sum([
                        int(matching)
                        for matching in cigar_match_re.findall(match[5])
                    ]))
                #base_cov_dict[match[2]] += sum([int(matching) for matching in cigar_match_re.findall(match[5])])
                read_cov_dict[match[2]] += 1
            except:
                print BtLog.warn_d['2'] % (match[2])
            BtLog.progress(seen_reads, progress_unit, reads_mapped)
    else:
        for line in runCmd(command=command):
            seen_reads += 1
            match = line.split()
            try:
                read_cov_dict[match[2]] += 1
            except:
                print BtLog.warn_d['2'] % (match[2])
            BtLog.progress(seen_reads, progress_unit, reads_mapped)
    if not int(reads_mapped) == int(seen_reads):
        print BtLog.warn_d['3'] % (reads_mapped, seen_reads)
        reads_mapped = seen_reads
    base_cov_dict = {
        seq_name: sum(base_covs)
        for seq_name, base_covs in base_cov_dict.items()
    }
    #end = time.time()
    #print (end-start)
    return base_cov_dict, reads_total, reads_mapped, read_cov_dict

Пример #9

0

Показать файл

Файл: BtIO.py Проект: mc-assemblage/blobtools

def writeNodesDB(nodesDB, nodesDB_f):
    nodes_count = nodesDB['nodes_count']
    i = 0
    with open(nodesDB_f, 'w') as fh:
        fh.write("# nodes_count = %s\n" % nodes_count)
        for node in nodesDB:
            if not node == "nodes_count":
                i += 1
                BtLog.progress(i, 1000, nodes_count)
                fh.write("%s\t%s\t%s\t%s\n" %
                         (node, nodesDB[node]['rank'], nodesDB[node]['name'],
                          nodesDB[node]['parent']))

Пример #10

0

Показать файл

def parseCov(infile, set_of_blobs):
    if not isfile(infile):
        BtLog.error('0', infile)
    old_cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
    base_cov_dict = {}

    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)\t(\d+\.*\d*)")
    reads_total = 0
    reads_mapped = 0
    reads_unmapped = 0
    read_cov_dict = {}

    seqs_parsed = 0
    progress_unit = 1
    old_format = 1
    with open(infile) as fh:
        for line in fh:
            if line.startswith("#"):
                old_format = 0
            if old_format == 0:
                if line.startswith('#'):
                    if line.startswith("## Total Reads"):
                        reads_total = int(line.split(" = ")[1])
                    elif line.startswith("## Mapped Reads"):
                        reads_mapped = int(line.split(" = ")[1])
                    elif line.startswith("## Unmapped Reads"):
                        reads_unmapped = int(line.split(" = ")[1])
                    else:
                        pass
                else:
                    match = cov_line_re.search(line)
                    if match:
                        seqs_parsed += 1
                        name, read_cov, base_cov = match.group(1), int(
                            match.group(2)), float(match.group(3))
                        if name not in set_of_blobs:
                            print BtLog.warn_d['2'] % (name)
                        else:
                            read_cov_dict[name] = read_cov
                            base_cov_dict[name] = base_cov
            else:
                match = old_cov_line_re.search(line)
                if match:
                    seqs_parsed += 1
                    name, base_cov = match.group(1), float(match.group(2))
                    if name not in set_of_blobs:
                        print BtLog.warn_d['2'] % (name)
                    else:
                        base_cov_dict[name] = base_cov
            BtLog.progress(seqs_parsed, progress_unit, len(set_of_blobs))
        #BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
    return base_cov_dict, reads_total, reads_mapped, reads_unmapped, read_cov_dict

Пример #11

0

Показать файл

 def computeTaxonomy(self, taxrules, nodesDB):
     tree_lists = BtTax.getTreeList(self.set_of_taxIds, nodesDB)
     self.lineages = BtTax.getLineages(tree_lists, nodesDB)
     self.taxrules = taxrules
     i = 0
     for blObj in self.dict_of_blobs.values():
         i += 1
         BtLog.progress(i, 100, self.seqs)
         for taxrule in taxrules:
             if (blObj.hits):
                 blObj.taxonomy[taxrule] = BtTax.taxRule(taxrule, blObj.hits, self.lineages)
             else:
                 blObj.taxonomy[taxrule] = BtTax.noHit()

Пример #12

0

Показать файл

Файл: BtIO.py Проект: mc-assemblage/blobtools

def readNodesDB(nodesDB_f):
    nodesDB = {}
    nodes_count = 0
    i = 0
    with open(nodesDB_f) as fh:
        for line in fh:
            if line.startswith("#"):
                nodes_count = int(line.lstrip("# nodes_count = ").rstrip("\n"))
            else:
                i += 1
                node, rank, name, parent = line.rstrip("\n").split("\t")
                nodesDB[node] = {'rank': rank, 'name': name, 'parent': parent}
                BtLog.progress(i, 1000, nodes_count)
    return nodesDB

Пример #13

0

Показать файл

Файл: BtIO.py Проект: hyphaltip/blobtools

def readNodesDB(nodesDB_f):
    nodesDB = {}
    nodes_count = 0
    i = 0
    with open(nodesDB_f) as fh:
        for line in fh:
            if line.startswith("#"):
                nodes_count = int(line.lstrip("# nodes_count = ").rstrip("\n"))
            else:
                i += 1
                node, rank, name, parent = line.rstrip("\n").split("\t")
                nodesDB[node] = {'rank' : rank, 'name' : name, 'parent' : parent}
                BtLog.progress(i, 1000, nodes_count)
    return nodesDB

Пример #14

0

Показать файл

Файл: BtIO.py Проект: evolgenomology/blobtools

def readCov(infile, set_of_blobs):
    old_cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
    base_cov_dict = {}

    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)\t(\d+\.*\d*)")
    reads_total = 0
    reads_mapped = 0
    read_cov_dict = {}

    seqs_parsed = 0
    progress_unit = 1
    old_format = 1
    with open(infile) as fh:
        for line in fh:
            if line.startswith("#"):
                old_format = 0
            if old_format == 0:
                if line.startswith("# Total Reads"):
                    reads_total = int(line.split(" = ")[1])
                elif line.startswith("# Mapped Reads"):
                    reads_mapped = int(line.split(" = ")[1])
                elif line.startswith("# Unmapped Reads"):
                    pass
                elif line.startswith("# Parameters"):
                    pass
                elif line.startswith("# contig_id"):
                    pass
                else:
                    match = cov_line_re.search(line)
                    if match:
                        seqs_parsed += 1
                        name, read_cov, base_cov = match.group(1), int(match.group(2)), float(match.group(3))
                        if name not in set_of_blobs:
                            print BtLog.warn_d['2'] % (name, infile)
                        read_cov_dict[name] = read_cov
                        base_cov_dict[name] = base_cov
            else:
                match = old_cov_line_re.search(line)
                if match:
                    seqs_parsed += 1
                    name, base_cov = match.group(1), float(match.group(2))
                    if name not in set_of_blobs:
                        print BtLog.warn_d['2'] % (name, infile)
                    base_cov_dict[name] = base_cov
            BtLog.progress(seqs_parsed, progress_unit, len(set_of_blobs))
        #BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
    return base_cov_dict, reads_total, reads_mapped, read_cov_dict

Пример #15

0

Показать файл

 def computeTaxonomy(self, taxrules, nodesDB, min_score, min_bitscore_diff,
                     tax_collision_random):
     print BtLog.status_d['6'] % ",".join(taxrules)
     tree_lists = BtTax.getTreeList(self.set_of_taxIds, nodesDB)
     self.lineages = BtTax.getLineages(tree_lists, nodesDB)
     self.taxrules = taxrules
     self.min_score = min_score
     self.min_diff = min_bitscore_diff
     self.tax_collision_random = tax_collision_random
     i = 0
     for blObj in self.dict_of_blobs.values():
         i += 1
         BtLog.progress(i, 100, self.seqs)
         for taxrule in taxrules:
             if (blObj.hits):
                 blObj.taxonomy[taxrule] = BtTax.taxRule(
                     taxrule, blObj.hits, self.lineages, min_score,
                     min_bitscore_diff, tax_collision_random)
             else:
                 blObj.taxonomy[taxrule] = BtTax.noHit()
     self.set_of_taxIds = set()

Пример #16

0

Показать файл

Файл: seqfilter.py Проект: cschu/blobtools

def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    list_f = args['--list']
    invert = args['--invert']
    prefix = args['--out']

    output = []
    out_f = BtIO.getOutFile(fasta_f, prefix, "filtered.fna")

    print BtLog.status_d['1'] % ("list", list_f)
    items = BtIO.parseSet(list_f)
    items_count = len(items)
    print BtLog.status_d['22'] % fasta_f
    items_parsed = []
    sequences = 0
    for header, sequence in BtIO.readFasta(fasta_f):
        sequences += 1
        if header in items:
            if not (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        else:
            if (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        BtLog.progress(len(output), 10, items_count, no_limit=True)
    BtLog.progress(items_count, 10, items_count)

    items_parsed_count = len(items_parsed)
    print BtLog.status_d['23'] % ('{:.2%}'.format(items_parsed_count/sequences), "{:,}".format(items_count), "{:,}".format(items_parsed_count), "{:,}".format(sequences))

    items_parsed_count_unique = len(set(items_parsed))
    if not items_parsed_count == items_parsed_count_unique:
        print BtLog.warn_d['8'] % "\n\t\t\t".join(list(set([x for x in items_parsed if items_parsed.count(x) > 1])))

    with open(out_f, "w") as fh:
        print BtLog.status_d['24'] % out_f
        fh.write("".join(output))

Пример #17

0

Показать файл

Файл: BtIO.py Проект: mc-assemblage/blobtools

def readCov(infile, set_of_blobs):
    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
    cov_dict = {}
    seqs_parsed = 0
    progress_unit = int(len(set_of_blobs) / 100)
    with open(infile) as fh:
        for line in fh:
            BtLog.progress(seqs_parsed, 10, len(set_of_blobs))
            match = cov_line_re.search(line)
            if match:
                seqs_parsed += 1
                name, cov = match.group(1), float(match.group(2))
                if name not in set_of_blobs:
                    print BtLog.warn_d['2'] % (name, infile)
                cov_dict[name] = cov
            BtLog.progress(seqs_parsed, progress_unit, len(set_of_blobs))
        BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
    return cov_dict

Пример #18

0

Показать файл

Файл: BtIO.py Проект: hyphaltip/blobtools

def readCov(infile, set_of_blobs):
    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
    cov_dict = {}
    seqs_parsed = 0
    progress_unit = int(len(set_of_blobs)/100)
    with open(infile) as fh:
        for line in fh:
            BtLog.progress(seqs_parsed, 10, len(set_of_blobs))
            match = cov_line_re.search(line)
            if match:
                seqs_parsed += 1
                name, cov = match.group(1), float(match.group(2))
                if name not in set_of_blobs:
                    print BtLog.warn_d['2'] % (name, infile)
                cov_dict[name] = cov
            BtLog.progress(seqs_parsed, progress_unit, len(set_of_blobs))
        BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
    return cov_dict

Пример #19

0

Показать файл

def parseBamForFilter(infile, include_unmapped, outfile, include, exclude,
                      gzip, do_sort, keep_sorted, sort_threads):
    '''
    parse BAM to extract readpairs
    '''
    if not isfile(infile):
        BtLog.error('0', infile)
    if do_sort:
        command = blobtools.SAMTOOLS + ' sort -@ sort_threads -n -O bam -T temp -o %s.readsorted.bam %s' % (
            infile, infile)
        runCmd(command=command, wait=True)
        infile = "%s.readsorted.bam" % infile

    progress_unit = int(100000)
    #if progress_flag:
    #    reads_total, reads_mapped = checkBam(infile)
    command = blobtools.SAMTOOLS + " view -f 1 -F 256 -F 2048 %s" % infile

    pair_count_by_type, pair_seqs_by_type, out_fs_by_type = init_read_pairs(
        outfile, include_unmapped, include, exclude)
    if include:
        sequence_to_type_dict = defaultdict(lambda: 'Ex')
        for incl in include:
            sequence_to_type_dict[incl] = 'In'
        sequence_to_type_dict['*'] = 'Un'
    elif exclude:
        sequence_to_type_dict = defaultdict(lambda: 'In')
        for excl in exclude:
            sequence_to_type_dict[excl] = 'Ex'
        sequence_to_type_dict['*'] = 'Un'
    else:
        sequence_to_type_dict = defaultdict(lambda: 'In')
        sequence_to_type_dict['*'] = 'Un'

    iterator = ''
    read_pair_type = None
    iterator = runCmd(command=command)
    seen_reads = 0
    sam_lines = []
    print BtLog.status_d['26'] % infile
    for sam_line in iterator:
        sam_lines.append(sam_line)
    print BtLog.status_d['22'] % infile
    reads_total = len(sam_lines)
    for i in xrange(0, len(sam_lines), 2):
        read1 = sam_lines[i].split()
        try:
            seen_reads += 2
            read2 = sam_lines[i + 1].split()
            read_pair_type = "".join(
                sorted([
                    sequence_to_type_dict[read1[2]],
                    sequence_to_type_dict[read2[2]]
                ]))
            BtLog.progress(seen_reads, progress_unit, reads_total)
            if read_pair_type in pair_seqs_by_type:
                #pair_seqs_by_type[read_pair_type] += get_read_pair_seqs(read1, read2)
                pair_seqs_by_type[read_pair_type].append(
                    get_read_pair_seqs(read1, read2))
                pair_count_by_type[read_pair_type] += 1
        except IndexError:
            print BtLog.warn_d['11']
        #print_bam(read_pair_out_fs, read_pair_type, read1, read2) # this prints SAM files for debugging
    if not seen_reads == reads_total:
        BtLog.progress(reads_total, progress_unit, reads_total)
    write_read_pair_seqs(pair_count_by_type, pair_seqs_by_type, out_fs_by_type)
    # info log
    info_string = []
    info_string.append(('Total pairs', "{:,}".format(int(seen_reads / 2)),
                        '{0:.1%}'.format(1.00)))
    for read_pair_type, count in pair_count_by_type.items():
        info_string.append((read_pair_type + ' pairs', "{:,}".format(count),
                            '{0:.1%}'.format(count / int(seen_reads / 2))))
    info_out_f = getOutFile(outfile, None, "info.txt")
    with open(info_out_f, 'w') as info_fh:
        print BtLog.status_d['24'] % info_out_f
        info_fh.write(get_table(info_string))
    if do_sort and not keep_sorted:
        os.remove(infile)
    return 1

Пример #20

0

Показать файл

 def view(self, **kwargs):
     # arguments
     viewObjs = kwargs['viewObjs']
     ranks = kwargs['ranks']
     taxrule = kwargs['taxrule']
     hits_flag = kwargs['hits_flag']
     seqs = kwargs['seqs']
     cov_libs = kwargs['cov_libs']
     progress_bar = kwargs['progressbar']
     # Default sequences if no subset
     if not (seqs):
         seqs = self.order_of_blobs
     # Default cov_libs if no subset
     cov_lib_names = cov_libs
     if not (cov_libs):
         cov_lib_names = [covLib for covLib in self.covLibs]
     tax_lib_names = [taxLib for taxLib in sorted(self.hitLibs)]
     lineages = self.lineages
     # setup
     for viewObj in viewObjs:
         if viewObj.name == 'table':
             viewObj.header = self.getTableHeader(taxrule, ranks, hits_flag,
                                                  cov_lib_names)
         if viewObj.name == 'concoct_cov':
             viewObj.header = self.getConcoctCovHeader(cov_lib_names)
         if viewObj.name == 'covlib':
             viewObj.header = self.getCovHeader(cov_lib_names)
         if viewObj.name == 'experimental':
             viewObj.covs = {cov_lib: [] for cov_lib in cov_lib_names}
             viewObj.covs["covsum"] = []
             for taxrule in self.taxrules:
                 viewObj.tax[taxrule] = {rank: [] for rank in BtTax.RANKS}
     # bodies
     for i, seq in enumerate(seqs):
         if (progress_bar):
             BtLog.progress(i, 1000, len(seqs))
         blob = self.dict_of_blobs[seq]
         for viewObj in viewObjs:
             if viewObj.name == 'table':
                 viewObj.body.append(
                     self.getTableLine(blob, taxrule, ranks, hits_flag,
                                       cov_lib_names, tax_lib_names,
                                       lineages))
             if viewObj.name == 'concoct_cov':
                 viewObj.body.append(
                     self.getConcoctCovLine(blob, cov_lib_names))
             if viewObj.name == 'experimental':
                 viewObj.names.append(blob['name'])
                 viewObj.gc.append(blob['gc'])
                 viewObj.length.append(blob['length'])
                 cov_sum = 0.0
                 for cov_lib in blob['covs']:
                     viewObj.covs[cov_lib].append(blob['covs'][cov_lib])
                     cov_sum += blob['covs'][cov_lib]
                 viewObj.covs['covsum'].append(cov_sum)
                 for taxrule in blob['taxonomy']:
                     for rank in blob['taxonomy'][taxrule]:
                         viewObj.tax[taxrule][rank].append(
                             blob['taxonomy'][taxrule][rank]['tax'])
             if viewObj.name == 'concoct_tax':
                 for rank in ranks:
                     if not rank in viewObj.body:
                         viewObj.body[rank] = []
                     viewObj.body[rank].append(
                         self.getConcoctTaxLine(blob, rank, taxrule))
             if viewObj.name == 'covlib':
                 viewObj.body.append(self.getCovLine(blob, cov_lib_names))
     if (progress_bar):
         BtLog.progress(len(seqs), 1000, len(seqs))
     for viewObj in viewObjs:
         viewObj.output()

Python BtLog.progress примеры использования