Пример #1
0
def compress_vcf(args, myvcf):
    """Compress a vcf file with bgzip and tabix"""

    # make sure a zipped version of the file doesnt already exist
    cmd = "rm -f " + myvcf + ".bgz"
    whichcmd(cmd, args, 0)

    cmd = "bgzip " + myvcf
    whichcmd(cmd, args, 0)

    # move extension from .gz to .bgz
    cmd = "mv " + myvcf + ".gz " + myvcf + ".bgz "
    whichcmd(cmd, args, 0)

    cmd = "tabix -p vcf " + myvcf + ".bgz"
    whichcmd(cmd, args, 0)
Пример #2
0
def run_vlad_code_compare(args, savidir, inplist):
    """Run Vladimir's savi computations to compare samples"""

    # declare as global or else Python will treat as a local variable
    global qvtargs

    # variable to hold comma-delimited list of SGE job IDs
    sgejobids = ""

    # if hybrid, SAMP 1 tot depth = RD + AD, SAMP 2 tot depth = SDP
    if (args.hybrid): qvtargs = "--hybrid"

    # make sure the header file doesnt exist because we're going to append to it
    cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt}"
    whichcmd(cmd, args, 0)

    if (args.verbose): print("\n# savi comparison")
    pairwise_list = [s for s in args.sample.split(",") if ":" in s]

    for s in pairwise_list:
        # the priors
        str_a = s.split(":")[0]
        str_b = s.split(":")[1]

        prior_a = prior_dict[str_a]
        prior_b = prior_dict[str_b]

        # paste testout/sample_1_1.1p testout/sample_2_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_12.txt
        # paste testout/sample_1_1.1p testout/sample_3_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_13.txt
        # paste testout/sample_1_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_14.txt
        # paste testout/sample_2_1.1p testout/sample_3_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_23.txt
        # paste testout/sample_2_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_24.txt
        # paste testout/sample_3_1.1p testout/sample_4_1.1p | cut -f1-4,6-8 | ../savi/savi_poster -pd testout/prior testout/prior | ../savi/savi_conf -fc 1e-5  > testout/savi/pd_34.txt

        cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \
        args.bin + "/make_qvt -1 -2s " + str_a + "," + str_b + " " + qvtargs + " | " + \
        args.bin + "/savi_poster -pd " + prior_a + " " + prior_b + " | " + \
        args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp1=" + str_a + " -v samp2=" + str_b + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_" + str_a + str_b + ".txt"
        # args.bin + "/savi_conf -fc " + args.saviconf + " | awk -v samp1=" + str_a + " -v samp2=" + str_b + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_" + str_a + str_b + ".txt"
        cmd = escape_special_char(cmd)

        # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0):
        # run cmd and store SGE job id
        myjobid = whichcmd(cmd, args, 1, args.sge,
                           "j_s" + str_a + "_s" + str_b + "_cmp")

        if (args.sge):
            print("Your job " + myjobid + " has been submitted")
            sgejobids = myjobid + "," + sgejobids

        # fix vcf header
        with open(savidir + "/header_addition.txt", 'a') as f:
            f.write(
                "##INFO=<ID=PD" + str_a + str_b +
                "_F,Number=1,Type=Integer,Description=\"Savi freq delta for sample "
                + str_a + " vs " + str_b + "\">\n")
            f.write(
                "##INFO=<ID=PD" + str_a + str_b +
                "_L,Number=1,Type=Integer,Description=\"Savi freq delta lower bound for sample "
                + str_a + " vs " + str_b + "\">\n")
            f.write(
                "##INFO=<ID=PD" + str_a + str_b +
                "_U,Number=1,Type=Integer,Description=\"Savi freq delta upper bound for sample "
                + str_a + " vs " + str_b + "\">\n")

    # a there was at least one savi comparison and there are more than 2 samples, run a 1 vs ALL comparision per order of JiGuang
    if (pairwise_list and len(inplist) > 2):
        # arbitarily use the first prior
        str_a = pairwise_list[0].split(":")[0]
        str_b = pairwise_list[0].split(":")[0]

        prior_a = prior_dict[str_a]
        prior_b = prior_dict[str_b]

        cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \
        args.bin + "/make_qvt -1 -1vsall | " + \
        args.bin + "/savi_poster -pd " + prior_a + " " + prior_b + " | " + \
        args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp1=0 -v samp2=0" + " '" + '{mystr="PD"samp1 samp2; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/pd_00.txt"
        cmd = escape_special_char(cmd)

        # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0):
        # run cmd and store SGE job id
        myjobid = whichcmd(cmd, args, 1, args.sge,
                           "j_s" + str_a + "_s" + str_b + "_cmp")

        if (args.sge):
            print("Your job " + myjobid + " has been submitted")
            sgejobids = myjobid + "," + sgejobids

        # fix vcf header
        with open(savidir + "/header_addition.txt", 'a') as f:
            f.write(
                "##INFO=<ID=PD00_F,Number=1,Type=Integer,Description=\"Savi freq delta for samples 1 vs all the others\">\n"
            )
            f.write(
                "##INFO=<ID=PD00_L,Number=1,Type=Integer,Description=\"Savi freq delta lower bound for samples 1 vs all the others\">\n"
            )
            f.write(
                "##INFO=<ID=PD00_U,Number=1,Type=Integer,Description=\"Savi freq delta upper bound for samples 1 vs all the others\">\n"
            )

    # a there was at least one savi comparison
    if (pairwise_list):

        if (args.verbose): print("\n# paste savi numbers into INFO field")
        cmd = 'paste -d";" ' + savidir + "/pd_*.txt > " + savidir + "/pd.txt"
        cmd = escape_special_char(cmd)
        # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0):
        mysyncjob = whichcmd(cmd, args, 1, args.sge, "mysync", sgejobids, 1)

        if (args.sge):
            print("Your job " + mysyncjob + " has been submitted")

        if (not args.debug):
            if (args.verbose): print("\n# clean up")
            cmd = "rm -f " + savidir + "/pd_*.txt"
            whichcmd(cmd, args, 0)

        cmd = "zcat " + savidir + "/freqsavi.vcf.bgz | " + \
        args.bin + "/add_to_info -f " + savidir + "/pd.txt --header " + savidir + "/vcfheader.txt > " + savidir + "/tmp0.txt"
        whichcmd(cmd, args, 0)

        cmd = "cat " + savidir + "/header_addition.txt " + savidir + "/vcfheader.txt " + savidir + "/tmp0.txt > " + savidir + "/finalsavi.vcf"
        whichcmd(cmd, args, 0)

        if (args.verbose): print("\n# compress finalsavi.vcf")
        compress_vcf(args, savidir + "/finalsavi.vcf")

        if (not args.debug):
            if (not args.keepfreqfile):
                if (args.verbose): print("\n# clean up")
                cmd = "rm -f " + savidir + "/{freqsavi.vcf.bgz,freqsavi.vcf.bgz.tbi}"
                whichcmd(cmd, args, 0)

    if (not args.debug):
        if (args.verbose): print("\n# clean up")
        cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt,tmp0.txt,pd.txt,addsavi.vcf.bgz,addsavi.vcf.bgz.tbi,filtersavi.vcf.bgz,filtersavi.vcf.bgz.tbi}"
        whichcmd(cmd, args, 0)

    if (args.verbose): print("[END]")
Пример #3
0
def run_vlad_code_freq(args, savidir, inplist):
    """Run Vladimir's savi computations to get freq"""

    # declare as global or else Python will treat as a local variable
    global qvtargs

    # variable to hold comma-delimited list of SGE job IDs
    sgejobids = ""

    # this variable is true only for the first iteration of the list
    bool_first = 1

    # make sure the header file doesnt exist because we're going to append to it
    cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt}"
    whichcmd(cmd, args, 0)

    # prepend this to cmd str:
    firstcmd = ""
    if (args.nofilter):
        firstcmd = "zcat " + savidir + "/addsavi.vcf.bgz | "
    else:
        firstcmd = "zcat " + savidir + "/filtersavi.vcf.bgz | "

    # make freq files
    for i in inplist:
        if bool_first:
            # if hybrid, start with tot depth = RD + AD for first iteration, then turn off
            if (args.hybrid): qvtargs = "--rdplusad"
            bool_first = 0
        else:
            # if hybrid, start with tot depth = RD + AD for first iteration, then turn off
            if (args.hybrid): qvtargs = ""

        if (args.verbose): print("\n# compute freq for sample " + i)
        cmd = firstcmd + args.bin + "/make_qvt -1 -s " + i + " " + qvtargs + " | " + \
        args.bin + "/savi_poster -p " + prior_dict[i] + " | " + \
        args.bin + "/savi_conf -fs " + args.saviconf + " " + args.saviprecision + " | awk -v samp=" + i + " '" + '{mystr="P"samp; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/freq_" + i + ".txt"
        # args.bin + "/savi_conf -fc " + args.saviconf + " | awk -v samp=" + i + " '" + '{mystr="P"samp; print mystr"_F="$1";"mystr"_L="$5";"mystr"_U="$(NF-1)}' + "' > " + savidir + "/freq_" + i + ".txt"
        cmd = escape_special_char(cmd)
        # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0):
        # run cmd and store SGE job id
        myjobid = whichcmd(cmd, args, 1, args.sge, "j_s" + i + "_freq")

        if (args.sge):
            print("Your job " + myjobid + " has been submitted")
            sgejobids = myjobid + "," + sgejobids

        # fix vcf header
        with open(savidir + "/header_addition.txt", 'a') as f:
            f.write(
                "##INFO=<ID=P" + i +
                "_F,Number=1,Type=Integer,Description=\"Savi freq for sample "
                + i + "\">\n")
            f.write(
                "##INFO=<ID=P" + i +
                "_L,Number=1,Type=Integer,Description=\"Savi freq lower bound for sample "
                + i + "\">\n")
            f.write(
                "##INFO=<ID=P" + i +
                "_U,Number=1,Type=Integer,Description=\"Savi freq upper bound for sample "
                + i + "\">\n")

    if (args.verbose): print("\n# paste savi freq numbers into INFO field")
    cmd = 'paste -d";" ' + savidir + "/freq_*.txt > " + savidir + "/freq.txt"
    cmd = escape_special_char(cmd)
    # whichcmd(cmd, args, wantreturn, wantqsub=0, jobname="myjob", holdstr="0", wantsync=0):
    mysyncjob = whichcmd(cmd, args, 1, args.sge, "mysync", sgejobids, 1)

    if (args.sge):
        print("Your job " + mysyncjob + " has been submitted")

    if (not args.debug):
        if (args.verbose): print("\n# clean up")
        cmd = "rm -f " + savidir + "/freq_*.txt"
        whichcmd(cmd, args, 0)

    # add into INFO field
    cmd = firstcmd + args.bin + "/add_to_info -f " + savidir + "/freq.txt --header " + savidir + "/vcfheader.txt > " + savidir + "/tmp0.txt"
    whichcmd(cmd, args, 0)

    # add new header lines to vcf
    cmd = "cat " + savidir + "/header_addition.txt " + savidir + "/vcfheader.txt " + savidir + "/tmp0.txt > " + savidir + "/freqsavi.vcf"
    whichcmd(cmd, args, 0)

    if (not args.debug):
        if (args.verbose): print("\n# clean up")
        cmd = "rm -f " + savidir + "/{header_addition.txt,vcfheader.txt,tmp0.txt,freq.txt}"
        whichcmd(cmd, args, 0)

    # compress
    if (args.verbose): print("\n# compress freqsavi.vcf")
    compress_vcf(args, savidir + "/freqsavi.vcf")