Пример #1
0
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] 
               
    # Create BlobDB object              
    blobDb = bt.BlobDb(title)

    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)
    # Parse coverage
    blobDb.parseCovs(cov_libs)

    # Parse Tax
    hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]
    blobDb.parseHits(hitLibs)
    
    # Parse nodesDB
    nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f)
    blobDb.nodesDB_f = nodesDB_f
        
    if not os.path.isfile(nodesDB_f):
        print BtLog.status_d['5'] % nodesDB_f
        BtIO.writeNodesDB(nodesDB, nodesDB_f)

    # Computing taxonomy based on taxrules
    print BtLog.status_d['6'] % ",".join(taxrules)
    blobDb.computeTaxonomy(taxrules, nodesDB)

    # Generating BlobDB and writing to file
    print BtLog.status_d['7'] % out_f
    BtIO.writeJson(blobDb.dump(), out_f)
Пример #2
0
def main():

    #main_dir = dirname(__file__)
    args = docopt(__doc__)
    fasta_f = args['--infile']
    fasta_type = args['--type']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--hitsfile']
    prefix = args['--out']
    nodesDB_f = args['--db']
    names_f = args['--names']
    estimate_cov_flag = True if not args['--calculate_cov'] else False
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    try:
        min_bitscore_diff = float(args['--min_diff'])
        min_score = float(args['--min_score'])
    except ValueError():
        BtLog.error('45')
    tax_collision_random = args['--tax_collision_random']
    title = args['--title']

    # outfile
    out_f = BtIO.getOutFile("blobDB", prefix, "json")
    if not (title):
        title = out_f

    # coverage
    if not (fasta_type) and not bam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
           [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    # taxonomy
    hit_libs = [
        BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f)
        for idx, lib_f in enumerate(hit_fs)
    ]

    # Create BlobDB object
    blobDb = BtCore.BlobDb(title)
    blobDb.version = interface.__version__
    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)

    # Parse nodesDB OR names.dmp, nodes.dmp
    nodesDB_default = join(dirname(abspath(__file__)), "../data/nodesDB.txt")
    nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f,
                                           names=names_f,
                                           nodesDB=nodesDB_f,
                                           nodesDBdefault=nodesDB_default)
    blobDb.nodesDB_f = nodesDB_f

    # Parse similarity hits
    if (hit_libs):
        blobDb.parseHits(hit_libs)
        if not taxrules:
            if len(hit_libs) > 1:
                taxrules = ['bestsum', 'bestsumorder']
            else:
                taxrules = ['bestsum']
        blobDb.computeTaxonomy(taxrules, nodesDB, min_score, min_bitscore_diff,
                               tax_collision_random)
    else:
        print(BtLog.warn_d['0'])

    # Parse coverage
    blobDb.parseCoverage(covLibObjs=cov_libs,
                         estimate_cov=estimate_cov_flag,
                         prefix=prefix)

    # Generating BlobDB and writing to file
    print(BtLog.status_d['7'] % out_f)
    BtIO.writeJson(blobDb.dump(), out_f)
Пример #3
0
 def output(self):
     # meta
     meta = self.get_meta()
     meta_f = join(self.view_dir, "meta.json")
     BtIO.writeJson(meta, meta_f, indent=2)
     # gc
     gc_f = join(self.view_dir, "gc.json")
     print BtLog.status_d['13'] % (gc_f)
     BtIO.writeJson(self.gc, gc_f, indent=1)
     # length
     length_f = join(self.view_dir, "length.json")
     print BtLog.status_d['13'] % (length_f)
     BtIO.writeJson(self.length, length_f, indent=1)
     # names
     names_f = join(self.view_dir, "names.json")
     print BtLog.status_d['13'] % (names_f)
     BtIO.writeJson(self.names, names_f, indent=1)
     # cov
     cov_d = join(self.view_dir, "covs")
     BtIO.create_dir(directory=cov_d)
     for cov_lib, cov in self.covs.items():
         cov_f = join(cov_d, "%s.json" % cov_lib)
         print BtLog.status_d['13'] % (cov_f)
         BtIO.writeJson(cov, cov_f, indent=1)
     # tax
     taxrule_d = join(self.view_dir, "taxrule")
     BtIO.create_dir(directory=taxrule_d)
     for taxrule in self.tax:
         tax_d = join(taxrule_d, taxrule)
         BtIO.create_dir(directory=tax_d)
         for rank in self.tax[taxrule]:
             tax = self.tax[taxrule][rank]
             rank_f = join(tax_d, "%s.json" % rank)
             BtIO.writeJson(tax, rank_f, indent=1)
Пример #4
0
 def output(self):
     # meta
     meta = self.get_meta()
     meta_f = join(self.view_dir, "meta.json")
     BtIO.writeJson(meta, meta_f, indent=2)
     # gc
     gc_f = join(self.view_dir, "gc.json")
     print BtLog.status_d['13'] % (gc_f)
     BtIO.writeJson({"values": self._format_float(self.gc)}, gc_f, indent=1)
     # length
     length_f = join(self.view_dir, "length.json")
     print BtLog.status_d['13'] % (length_f)
     BtIO.writeJson({"values": self.length}, length_f, indent=1)
     # Ns
     if max(self.n_count) > 0:
         n_f = join(self.view_dir, "ncount.json")
         print BtLog.status_d['13'] % (n_f)
         BtIO.writeJson(
             {"values": map(lambda x: max(x, 0.2), self.n_count)},
             n_f,
             indent=1)
     # identifiers
     ids_f = join(self.view_dir, "identifiers.json")
     print BtLog.status_d['13'] % (ids_f)
     BtIO.writeJson(self.names, ids_f, indent=1)
     # cov
     for cov_name, cov in self.covs.items():
         name = self._remove_cov_suffix(cov_name, self.blobDb.covLibs)
         cov_f = join(self.view_dir, "%s_cov.json" % name)
         print BtLog.status_d['13'] % (cov_f)
         BtIO.writeJson({"values": self._format_float(cov, 0.02)},
                        cov_f,
                        indent=1)
     # read_cov
     for cov_name, cov in self.read_covs.items():
         name = self._remove_cov_suffix(cov_name, self.blobDb.covLibs)
         cov_f = join(self.view_dir, "%s_read_cov.json" % name)
         print BtLog.status_d['13'] % (cov_f)
         BtIO.writeJson({"values": map(lambda x: max(x, 0.2), cov)},
                        cov_f,
                        indent=1)
     # tax
     for taxrule in self.tax:
         for rank in self.tax[taxrule]:
             tax = self._keyed_list(self.tax[taxrule][rank])
             rank_f = join(self.view_dir, "%s_%s.json" % (taxrule, rank))
             BtIO.writeJson(tax, rank_f, indent=1)
             score = self.tax_scores[taxrule][rank]['score']
             score_f = join(self.view_dir,
                            "%s_%s_score.json" % (taxrule, rank))
             BtIO.writeJson({"values": map(lambda x: max(x, 0.2), score)},
                            score_f,
                            indent=1)
             cindex = self.tax_scores[taxrule][rank]['c_index']
             cindex_f = join(self.view_dir,
                             "%s_%s_cindex.json" % (taxrule, rank))
             BtIO.writeJson({"values": cindex}, cindex_f, indent=1)