#!/usr/bin/env python

from video_analysis import submit_summarize_runs, viz_vidtools
import LSF, run_safe
import time, os, sys
from glob import glob

os.chdir('/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/')
logfile = '../rbt-logs/log'
currjobs = submit_summarize_runs.get_currjobs()

analysis_dirs = filter(None, [
    submit_summarize_runs.get_successful_analysis_dir(
        vid,
        submit_summarize_runs.RERUN_COEFFS,
        currjobs=currjobs,
        **submit_summarize_runs.DEFAULT_PARAMS)
    for vid in sorted(glob('*/merge6mbit_720_*.mp4'))
    if os.path.exists(viz_vidtools.cfg_fn(vid))
    and 'end' in open(viz_vidtools.cfg_fn(vid)).read()
])
trd = {}
for analysis_dir in analysis_dirs:
    #print >> sys.stderr, analysis_dir
    rbtdone = os.path.join(analysis_dir, 'rainbowtron')
    cmd = 'run_rainbowtron.py %s' % analysis_dir
    run_safe.add_cmd(trd, rbtdone, cmd, force_write=True)

LSF.lsf_run_until_done(trd, logfile, 'normal_serial', '', 'rainbow', 100, 3)
    compress_cmd = ' | %s -c ' % compress
else:
    compress_ext = ''
    compress_cmd = ''
    

#First off, get read group files
rgcmd = 'get_bam_rg_by_sample.py %s %s' % (bamfile, outroot)
ret = os.system(rgcmd)

if ret != 0:
    print >> sys.stderr, 'failed:\n%s' % rgcmd
    raise OSError

sample_files = glob(os.path.join(outroot,'*.rgids.txt'))
bambase = os.path.basename(bamfile)
stcmdstr = samtools_cmd.replace(' ','_')

trd = {}

for sf in sample_files:
    sm = os.path.basename(sf).rsplit('.',2)[0]
    outfile = os.path.join(outroot, '%s-%s-%s%s%s' % (bambase, sm, stcmdstr, ext, compress_ext))
    cmd = 'samtools view -hR %s %s | samtools view -bS - | samtools %s /dev/stdin %s> %s' % (sf, bamfile, samtools_cmd, compress_cmd, outfile)
    slurmbase = os.path.join(outroot,'%s-%s-%s' % (bambase, sm, stcmdstr))
    run_safe.add_cmd(trd, slurmbase, cmd,force_write=True)


logfile = os.path.join(outroot,'%s-%s_SLURMwd/log' % (bambase,stcmdstr))
SLURM.run_until_done(trd,'samtools_by_indiv',logfile,MAX_DUR,MAX_RAM,100,PARTITION,MAX_RETRY=3)
                
                expected_fq_d[glob_key] = 1
        else:
            errstr = 'no fastq for %s' % d
            raise ValueError, errstr

    preprocess_targets = list(set(preprocess_targets))

    ol_to_run_dict = {}
    pp_to_run_dict = {}

    for (r1,r2),(fc,l,idx,cs) in preprocess_targets:
        if r2 is None: #single read; preprocess only
            cmd = 'preprocess_radtag_lane.py -w -u -s %s -fc %s -l %s -idx %s %s %s' % (cs,fc,l,idx,opts.force_db_id and '--force_db_id' or '',r1)
            ss_base = os.path.join(os.path.dirname(r1),'sr_preprocess_lane%s_index%s_DBID%s' % (l,idx,opts.force_db_id))
            run_safe.add_cmd(pp_to_run_dict, ss_base, cmd, force_write=True)
        else:
            cmd = 'overlap_preprocess.py -fc %s -l %s -idx %s -pp "-w -u -s %s %s" %s %s' % (fc,l,idx,cs,opts.force_db_id and '--force_db_id' or '',r1,r2)
            ss_base = os.path.join(os.path.dirname(r1),'ol_preprocess_lane%s_index%s_DBID%s' % (l,idx,opts.force_db_id))
            run_safe.add_cmd(ol_to_run_dict, ss_base, cmd, force_write=True)

    print pp_to_run_dict
    print
    print ol_to_run_dict

    to_run_dict = {}
    to_run_dict.update(pp_to_run_dict)
    to_run_dict.update(ol_to_run_dict)

    jobname_base = 'preprocess'
    logbase = os.path.join(opts.outroot,'slurmlog','preprocess')
    # MAPPING COMPLETE


    # Genotyping steps follow
    #  IF --reduced_reads, then realign and reduce before merging to single bam
    #  otherwise merge into 1 bam, then realign (if --realign) then perform relevant GATK/samtools/whatever steps
    import subprocess
    # run reduced reads (and single-sample realignment)
    if opts.reduce_reads:
        to_run_dict = {}
        rr_rg_ref_bams = []
        for bam in rg_ref_bams:
            rr_bam = os.path.splitext(bam)[0] + '.realigned.reduced.bam'
            rr_done = os.path.splitext(bam)[0] + '.realigned.reduced'
            rr_cmd = 'realign_reduce_bam.py %s %s' % (bam,reference_fasta)
            run_safe.add_cmd(to_run_dict,rr_done,rr_cmd,force_write=True)
            rr_rg_ref_bams.append(rr_bam)

        #SLURM here
        to_run = rr_rg_ref_bams
        runs = 0
        while to_run:
            if runs >= 3:
                print >> sys.stderr, '3 attempts made without success; the following bams did not complete:\n%s' % '\n'.join(to_run)
                raise ValueError

            logfile = os.path.join(outroot,'%slog' % opts.scheduler,'realign-reduce-log')
            schedule_jobs(to_run_dict,opts.scheduler,'realign-reduce',logfile,opts.lsf_queue,requeue=opts.fallback_queue,njobs=njobs,duration=opts.max_job_duration,mem=(opts.gatk_ram*1024)+JOB_MEM_OVERHEAD,flags='-R "select[mem>20000]"',MAX_RETRY=MAX_RETRY)
            #LSF.lsf_run_until_done(to_run_dict,logfile,opts.lsf_queue,'-R "select[mem>20000]"','realign-reduce',njobs,MAX_RETRY)

            #CHECK BAMS WITH samtools
#!/usr/bin/env python

queue = "general"
max_job_duration = 1440
job_ram = (2 + 1) * 1024
job_batches = 500

import os, sys, SLURM, run_safe

geno, pheno, runs = sys.argv[1:]

basedir, basename = os.path.split(pheno)
donedir = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-donedir/")
logfile = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-logs/log-")

if not os.path.exists(donedir):
    os.makedirs(donedir)

trd = {}
for i in range(int(runs)):
    run_safe.add_cmd(trd, donedir + str(i), "random_wigs.py %s %s %s" % (geno, pheno, i), force_write=True)


# LSF.lsf_run_until_done(trd,logfile,queue,'','random-wigs',1000,3)
SLURM.run_until_done(trd, "random-wigs", logfile, max_job_duration, job_ram, job_batches, queue, MAX_RETRY=3)
#!/usr/bin/env python

from video_analysis import submit_summarize_runs, viz_vidtools
import LSF, run_safe
import time, os, sys
from glob import glob

os.chdir("/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/")
logfile = "../rbt-logs/log"
currjobs = submit_summarize_runs.get_currjobs()

analysis_dirs = filter(
    None,
    [
        submit_summarize_runs.get_successful_analysis_dir(
            vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS
        )
        for vid in sorted(glob("*/merge6mbit_720_*.mp4"))
        if os.path.exists(viz_vidtools.cfg_fn(vid)) and "end" in open(viz_vidtools.cfg_fn(vid)).read()
    ],
)
trd = {}
for analysis_dir in analysis_dirs:
    # print >> sys.stderr, analysis_dir
    rbtdone = os.path.join(analysis_dir, "rainbowtron")
    cmd = "run_rainbowtron.py %s" % analysis_dir
    run_safe.add_cmd(trd, rbtdone, cmd, force_write=True)


LSF.lsf_run_until_done(trd, logfile, "normal_serial", "", "rainbow", 100, 3)
#!/usr/bin/env python

queue = 'general'
max_job_duration = 1440
job_ram = (4+1)*1024
job_batches = 500

import os,sys,SLURM,run_safe
from glob import glob

genodir,pheno = sys.argv[1:]

basedir = os.path.dirname(pheno)
#donedir = os.path.join(basedir,'donedir/')
logfile = os.path.join(basedir,'logs/SLURM/')

#if not os.path.exists(donedir): os.makedirs(donedir)

genos = glob(os.path.join(genodir,'*-geno.txt'))

print >> sys.stderr, 'wigs on %s contigs' % len(genos)

trd = {}
for geno in genos:
    outbase = geno.replace('-geno.txt','_output-')
    run_safe.add_cmd(trd,outbase[:-1],'wigs simevo -g %s -s %s -f %s -x 40000 -b 500 -t 100 -d 1' % (geno,pheno,outbase) ,force_write=True)

SLURM.run_until_done(trd,'wigs-by-chrom',logfile,max_job_duration,job_ram,job_batches,queue,MAX_RETRY=3)