def step4(model_list):
    print "Finding the penalty with the best cross-validation likelihood..."
    p_list = ["0.05","0.10","0.15","0.20","0.25","0.30","0.35","0.40","0.45","0.50",
              "0.55","0.60","0.65","0.70","0.75","0.80","0.85","0.90","0.95","1.0"]
    model_name = "-".join(model_list)
    model = "+".join(model_list)
    for p in p_list:
        job_file = job_dir+job_prefix+model_name+"-"+p+".sh"
        fout=open(job_file,'w')
        if "distance_tss" in model_list:
            temp_list = list(model_list)
            temp_list.remove("distance_tss")
            model_sub = "+".join(temp_list)
            command_list = [fgwas, "-i", input_file, "-cc",
                            "-dists", "distance_tss"+":"+home_dir+"dist_model",
                            "-w", model_sub, "-p", p, "-xv", "-print", "-onlyp",
                            "-o", out_dir+model+"-p"+p]
        else:
            command_list = [fgwas, "-i", input_file, "-cc",  "-w",
                            model, "-p", p, "-xv", "-print", "-onlyp",
                            "-o", out_dir+model+"-p"+p]
        command = " ".join(command_list)
        script='''
#$ -N %s%s
#$ -pe shmem 1
#$ -P mccarthy.prjc
#$ -q long.qc
#$ -e %s%s.error
#$ -o %s%s.out
echo "start time" `date`
%s
echo "end time" `date`
        ''' % (job_prefix,model_name+"-p"+p, log_dir,job_prefix+model_name+"-p"+p,
        log_dir,job_prefix+model_name+"-p"+p, command)
        fout.write(script)
        fout.close()
        call = ["qsub", job_file]
        out_path = out_dir+model+"-p"+p+".ridgeparams" # try onlyp for comparison
        if os.path.exists(out_path) == False:
            sp.check_call(call)
        if os.path.exists(out_path) == True and os.stat(out_path).st_size == 0:
            sp.check_call(call)
    job_list = moniter_rescomp_jobs.get_job_ids(job_prefix)
    moniter_rescomp_jobs.wait_for_jobs(job_list)
    print "Finding best parameter value..."
    track_dic = {}
    for p in p_list:
        fin = open(out_dir+model+"-p"+p+".ridgeparams",'r')
        line_list = fin.readlines()
        fin.close()
        line = line_list[-1]
        llk = line.strip().split()[-1]
        track_dic[p]=llk
        print (p + ": " + str(llk))
    sorted_p = sorted(track_dic.items(),key=operator.itemgetter(1))
    sorted_p.reverse() #used when comparing ln(lk)
    best = sorted_p[0]
    print "Optimal parameter value evaluated: %s"   % best[0]
    return best
def step1():
    '''
    Start index is the first index for an annotation in the file
    Here, the start index is 10 (column 11)
    '''
    fin = gzip.open(input_file, 'rb')
    annot_list = fin.readline().strip().split()[start_index:]
    fin.close()
    for annot in annot_list:
        job_file = job_dir + "job_" + annot + ".sh"
        fout = open(job_file, 'w')
        if annot == "distance_tss":
            command_list = [
                fgwas, "-i", input_file, "-cc", "-dists",
                annot + ":" + home_dir + "dist_model", "-o", out_dir + annot
            ]
        else:
            command_list = [
                fgwas, "-i", input_file, "-cc", "-w", annot, "-o",
                out_dir + annot
            ]
        command = " ".join(command_list)
        # removed #$ -V from script
        script = '''
#$ -N job_%s
#$ -pe shmem 1
#$ -P mccarthy.prjc
#$ -q short.qc
#$ -e %s%s.error
#$ -o %s%s.out

echo "start time" `date`
%s
echo "end time" `date`
        ''' % (annot, log_dir, "job_" + annot, log_dir, "job_" + annot,
               command)
        fout.write(script)
        fout.close()
        call = ["qsub", job_file]
        out_path = out_dir + annot + ".llk"
        if os.path.exists(out_path) == False:
            sp.check_call(call)
        if os.path.exists(out_path) == True and os.stat(out_path).st_size == 0:
            sp.check_call(call)
    job_list = moniter_rescomp_jobs.get_job_ids("job_")
    moniter_rescomp_jobs.wait_for_jobs(job_list)
def step3(top_annot, top_val,sig_list):

    annot_list = sig_list
    top_annot_list = top_annot.split("+")
    annot_list = [x for x in annot_list if x not in top_annot_list]
    out_list = [top_annot+"+"+ x for x in annot_list]
    run_models(top_annot_list,annot_list)
    job_list = moniter_rescomp_jobs.get_job_ids(job_prefix)
    moniter_rescomp_jobs.wait_for_jobs(job_list)
    track_dic = {}
    for name in out_list:
        f = out_dir+name+".llk"
        fin=open(f,'r')
        #fin.readline() #use for AIC
        #fin.readline() #use for AIC
        l = fin.readline().strip().split() # get 3rd line
        fin.close()
        if l[0]=="ln(lk):":
        #if l[0]=="AIC:":
            track_dic[name] = float(l[1])
    sorted_annot = sorted(track_dic.items(),key=operator.itemgetter(1))
    sorted_annot.reverse() #used when comparing ln(lk)
    #print str(top_val) #+ " : " + str(floor(top_val))
    ## Next line(s) used for comparing ln(lk)
    sig_annot_list = [x for x in sorted_annot if float(x[1]) > float(top_val)]
    #sig_list = [x for x in sorted_annot if float(x[1]) > ceil(float(top_val))]
    ## Next line used for comparing AIC, comment out if unecessary
    #sig_list = [x for x in sorted_annot if float(x[1]) < floor(float(top_val))]
    #sig_list = [x for x in sorted_annot if float(x[1]) < float(top_val)]

    #print sig_annot_list
    #print len(sig_annot_list)
    if len(sig_annot_list) > 0:
        return [sig_annot_list[0], len(sig_annot_list)]
    else:
        return [top_annot, len(sig_annot_list)]
def step2(sig_list):
    print("Finding annotation with highest model likelihood..")
    annot_list = sig_list
    track_dic = {}
    for annot in annot_list:
        f = out_dir+annot+".llk"
        fin=open(f,'r')
        ## Use next line for ln(lk) directly
        l = fin.readline().strip().split()
        ## Use next 3 line for AIC
        ##fin.readline()
        ##fin.readline()
        ##l = fin.readline().strip().split()
        fin.close()
        try:
            if l[0]=="ln(lk):":
        #if l[0]=="AIC:":
                track_dic[annot] = float(l[1])
        except:
            print f
    sorted_annot = sorted(track_dic.items(),key=operator.itemgetter(1))
    sorted_annot.reverse() # use for ln(lk)
    top_annot = sorted_annot[0][0]
    top_val = sorted_annot[0][1]
    #print "Top annotation: %s ; ln(lk): %f" % (top_annot, top_val)
    ##print "Top annotation: %s ; AIC: %f" % (top_annot, top_val)
    for annot in sorted_annot[1:]:
        annot = annot[0]
        #print annot
        job_file = job_dir+job_prefix+top_annot+"-"+annot+".sh"
        fout=open(job_file,'w')
        if annot == "distance_tss":
            command_list = [fgwas, "-i", input_file, "-cc",
                            "-dists", annot+":"+home_dir+"dist_model",
                            "-w", annot, "-o", out_dir+top_annot+"+"+annot]
        else:
            command_list = [fgwas, "-i", input_file, "-cc",  "-w",
                            top_annot+"+"+annot, "-o", out_dir+top_annot+"+"+annot]
        command = " ".join(command_list)
        script='''
#$ -N %s%s
#$ -pe shmem 1
#$ -P mccarthy.prjc
#$ -q short.qc
#$ -e %s%s.error
#$ -o %s%s.out
echo "start time" `date`
%s
echo "end time" `date`
        ''' % (job_prefix,top_annot+"-"+annot, log_dir,job_prefix+top_annot+"-"+annot,
        log_dir,job_prefix+top_annot+"-"+annot, command)
        fout.write(script)
        fout.close()
        call = ["qsub", job_file]
        out_path = out_dir+top_annot+"+"+annot+".llk"
        if os.path.exists(out_path) == False:
            sp.check_call(call)
        if os.path.exists(out_path) == True and os.stat(out_path).st_size == 0:
            sp.check_call(call)
    job_list = moniter_rescomp_jobs.get_job_ids(job_prefix)
    moniter_rescomp_jobs.wait_for_jobs(job_list)
    top = [top_annot,top_val]
    return(top)
def step5(model_list,best_p,best_llk,best_dropped_mod="NA",previously_dropped=[]):
    print "Test dropping each annotation from the model, using cross-validation likelihood"
    print "Keep dropping annotations as long as the cross-validation likelihood keeps increasing"
    if len(previously_dropped) > 0:
        dropped = "+".join(previously_dropped) + "+"
    else:
        dropped = ""
    for mod in model_list:
        keep_list = list(model_list)
        dropped_mod = mod
        keep_list.remove(mod)
        if len(keep_list) <= 15:
            qc = "short.qc"
        else:
            qc = "long.qc"
        keep_mods = "+".join(keep_list)
        job_file = job_dir+job_prefix+"drop-"+dropped+mod+".sh"
        fout=open(job_file,'w')
        if "distance_tss" in keep_list:
            keep_list.remove("distance_tss")
            model_sub = "+".join(keep_list)
            command_list = [fgwas, "-i", input_file, "-cc",
                            "-dists", "distance_tss"+":"+home_dir+"dist_model",
                            "-w", model_sub, #keep_mods,
                            "-p", best_p, "-xv", "-print",
                            "-o", out_dir+"drop-"+dropped+mod]
        else:
            command_list = [fgwas, "-i", input_file, "-cc",
                            "-w", keep_mods, "-p", best_p, "-xv", "-print",
                            "-o", out_dir+"drop-"+dropped+mod]
        command = " ".join(command_list)
        script='''
#$ -N %sdrop-%s
#$ -pe shmem 1
#$ -P mccarthy.prjc
#$ -q %s
#$ -e %s%s.error
#$ -o %s%s.out
echo "start time" `date`
%s
echo "end time" `date`
        ''' % (job_prefix,dropped+mod, qc, log_dir,job_prefix+"drop-"+dropped+mod,
        log_dir,job_prefix+"drop-"+dropped+mod, command)
        fout.write(script)
        fout.close()
        call = ["qsub", job_file]
        out_path = out_dir+"drop-"+dropped+mod+".ridgeparams"
        if os.path.exists(out_path) == False:
            sp.check_call(call)
        if os.path.exists(out_path) == True and os.stat(out_path).st_size == 0:
            sp.check_call(call)
    job_list = moniter_rescomp_jobs.get_job_ids(job_prefix)
    moniter_rescomp_jobs.wait_for_jobs(job_list)
    print "The best likelihood value to beat: %s" % str(best_llk)
    track_dic = {}
    for mod in model_list:
        fin = open(out_dir+"drop-"+dropped+mod+".ridgeparams",'r')
        line_list = fin.readlines()
        fin.close()
        line = line_list[-1]
        llk = line.strip().split()[-1]
        track_dic[mod]=llk
        print ("dropped " + mod + ": " + str(llk))
    sorted_mods = sorted(track_dic.items(),key=operator.itemgetter(1))
    sorted_mods.reverse() #used when comparing ln(lk)
    check_list = [x for x in sorted_mods if float(x[1]) > float(best_llk)]
    try:
        best = check_list[0]
        best_dropped_mod = best[0]
        best_dropped_llk = best[1]
        report_list = list(model_list)
        report_list.remove(best_dropped_mod)
        print ("Best dropped model: %s" % best_dropped_mod)
        print ("Best dropped llk: %s" % best_dropped_llk)
        print ("Annotations to keep: %s" % ",".join(report_list))
        status_complete = False
        best_llk = best_dropped_llk
        return best_dropped_mod,best_dropped_llk, report_list, best_llk, status_complete
    except:
        print ("Dropping models didn't improve cross-validated likelihood")
        print ("Keep the current model!")
        status_complete = True
        return best_dropped_mod,False,model_list, best_llk, status_complete