Exemplo n.º 1
0
def tell_running_mode(fn):
    size = float(cmn.filesize(fn)) / 1024 / 1024  # M
    if size < 25:
        mode = 'RAxML'
    else:
        mode = 'ExaML'
    return mode
Exemplo n.º 2
0
def filter_best_fastq(fns):
    #group them by ID
    gdict = {}
    for fn in fns:
        sp = cmn.lastName(fn).split('_')[0]
        try:
            gdict[sp].append(fn)
        except KeyError:
            gdict[sp] = [fn]
    #check how many different parent dict for each one
    newlist = []
    for sp in gdict:
        fns = gdict[sp]
        pdirs = {}
        for fn in fns:
            pdir = '/'.join(fn.split('/')[:-1])
            try:
                pdirs[pdir].append(fn)
            except KeyError:
                pdirs[pdir] = [fn]
        if len(pdirs) == 1:
            newlist += fns
        else:
            #if multiple data, then
            #1. check to take the one with the biggest file size
            maxFns = (0, None)
            for pdir in pdirs:
                subFns = pdirs[pdir]
                size = sum([cmn.filesize(each) for each in subFns])
                if size > maxFns[0]:
                    maxFns = (size, subFns)
            newlist += maxFns[1]
    return newlist
Exemplo n.º 3
0
def compute_fileSize(alist):
    size = 0
    for fn in alist:
        if 'archive/butterfly' in fn:
            cmd = 'ssh [email protected] "python /home/wenlin/my_programs/filesize.py %s"' % fn
            size += int(cmn.cmd2info(cmd).strip())
        else:
            size += cmn.filesize(fn) / 1024 / 1024
    return size
Exemplo n.º 4
0

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
wdir = os.path.abspath(sys.argv[1].rstrip('/'))

project = cmn.lastName(os.getcwd()).split('_')[0]

f_list = 'falist'

cmd = 'ls %s/* > %s' % (wdir, f_list)
cmn.run(cmd)

falist = [os.path.abspath(fn) for fn in cmn.file2lines(f_list)]

Njob = 3
fa_size = cmn.filesize(falist[0]) / 1024 / 1024

Njob = max(Njob, 50 * fa_size / 5000 + 1)

Ncores = 48 * Njob / 100
print('number of cores:', Ncores)
print('number of jobs:', Njob)

cmds = []
outdir = 'making_fastme_trees'
cmn.mkdir(outdir)
for fa in falist:
    cmd = 'cd %s; python /project/biophysics/Nick_lab/wli/sequencing/scripts/fasta2fastmeTree.py %s %s' % (
        outdir, fa, Ncores)
    cmds.append(cmd)
Exemplo n.º 5
0
    except:
        print("Usage: *.py", file=sys.stderr)
        sys.exit()

    geneRange = read_gene_range(frange)

    seqDict, order_list = read_fa(fn)

    stat = []
    outdir = '%s_gene_fasta' % cmn.lastName(fn)
    cmn.mkdir(outdir)
    for gene in geneRange:
        i, j = geneRange[gene]
        print(gene, i, j)
        stat.append('%s\t%s\n' % (gene, j - i))

        dn = '%s/%s.fa' % (outdir, gene)
        with open(dn, 'w') as dp:
            for name in order_list:
                seq = seqDict[name][i:j]
                if seq.strip('-').strip('N') == '':
                    continue
                fasta = '>%s\n%s\n' % (name, seq)
                dp.write(fasta)

        if cmn.filesize(dn) == 0:
            print('fileSize0', dn)

    dn = cmn.lastName(fn) + '_takenRange.info'
    cmn.write_file(''.join(stat), dn)
Exemplo n.º 6
0
    todo_jobs = []
    for sp in rdict:
        records = rdict[sp]
        #print 'processing lib %s' % sp

        for record in records:
            fastq, ref = record
            reflabel = cmn.lastName(ref).replace('.fa', '')
            outlabel = '%s_%s' % (sp, reflabel)
            outdir = '%s/%s/%s' % (cwd, sp, reflabel)

            tmpcheck = cmn.cmd2lines(('ls %s/*sam 2> /dev/null' % outdir))
            if len(tmpcheck) > 0:
                total = 0
                for fn in tmpcheck:
                    total += cmn.filesize(fn)

                if total != 0:
                    print('skip finished mapping %s' % outdir)
                    continue

            cmn.mkdir(outdir)
            os.chdir(outdir)

            #paired is a dict
            paired, unpaired = separate_by_pair(fastq.split(','))

            cmd = 'cd %s;\n' % (refdir)
            for key in paired:
                lib1, lib2 = paired[key]
                cmd += '/home2/wli/local/bwa-0.7.12/bwa mem -t 32 -M %s %s %s > %s/%s_paired.sam;\n' % (