Пример #1
0
def validate_input_create(main_dir, args):
    '''
    Accepts: 
        - main_dir
        - docopt args
    Returns:
        - title
        - fasta_f
        - fasta_type
        - cov_libs
        - hit_libs
        - nodesDB_f
        - taxrules
        - out_f
    '''
    ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']

    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--taxfile']
    out_f = args['--out']
    if (out_f):
        out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
    else:
        out_f = "%s" % ("BlobDB.json")
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    title = args['--title'] if (args['--title']) else out_f

    # Do files exist ?
    files = [
        x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs +
                        [names_f] + [nodes_f] + hit_fs) if x is not None
    ]
    for f in files:
        if not os.path.isfile(f):
            BtLog.error('0', f)

    # Is taxonomy provided?
    if nodesDB_f == "data/nodesDB.txt":
        nodesDB_f = os.path.join(main_dir, nodesDB_f)
    if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
        BtLog.error('3')
    if not (hit_fs):
        BtLog.error('18')
    # can FASTA parser deal with assemblies
    if not fasta_type in ASSEMBLY_TYPES:
        BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
    # Is coverage provided?
    if not (fasta_type
            ) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    hit_libs = [
        bt.hitLibObj('tax' + str(idx), 'tax', lib_f)
        for idx, lib_f in enumerate(hit_fs)
    ]

    return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f
Пример #2
0
    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] 
               
    # Create BlobDB object              
    blobDb = bt.BlobDb(title)

    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)
    # Parse coverage
    blobDb.parseCovs(cov_libs)

    # Parse Tax
    hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]
    blobDb.parseHits(hitLibs)
    
    # Parse nodesDB
    nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f)
    blobDb.nodesDB_f = nodesDB_f
        
    if not os.path.isfile(nodesDB_f):
        print BtLog.status_d['5'] % nodesDB_f
        BtIO.writeNodesDB(nodesDB, nodesDB_f)

    # Computing taxonomy based on taxrules
    print BtLog.status_d['6'] % ",".join(taxrules)
    blobDb.computeTaxonomy(taxrules, nodesDB)

    # Generating BlobDB and writing to file
Пример #3
0
def validate_input_create(main_dir, args):
    '''
    Accepts: 
        - main_dir
        - docopt args
    Returns:
        - title
        - fasta_f
        - fasta_type
        - cov_libs
        - hit_libs
        - nodesDB_f
        - taxrules
        - out_f
    '''
    ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']

    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--taxfile']
    out_f = args['--out']
    if (out_f):
        out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
    else:
        out_f = "%s" % ("BlobDB.json")
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    title = args['--title'] if (args['--title']) else out_f
    
    # Do files exist ?
    files = [x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None]
    for f in files:
        if not os.path.isfile(f):
            BtLog.error('0', f)

    # Is taxonomy provided?
    if nodesDB_f == "data/nodesDB.txt":
        nodesDB_f = os.path.join(main_dir, nodesDB_f)
    if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
        BtLog.error('3')
    if not (hit_fs):
        BtLog.error('18')
    # can FASTA parser deal with assemblies
    if not fasta_type in ASSEMBLY_TYPES:
        BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
    # Is coverage provided?
    if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] 

    hit_libs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]

    return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f