示例#1
0
def getLOH(path, program, gene):
    sol = "Not found"
    if os.path.isfile(path):
        reg = lc.convert2region(path, program, "error")
        sol = lg.getCopyNumber(gene[1:3], gene[0], reg)

    return sol
示例#2
0
def getLOH(path, program, gene):
    """Find the copy number (or LOH) in the program passed as parameter in the gene passed as parameter too

    Additionally, get the purity in the sample calculated by the program

    Parameters
    ----------
        path : str
            Output file from the program. Here we will search the LOH (or CNV)
        program : str
            Program that has generated the output file passed in the previous parameter. Valid values are: facets, ascatngs, sequenza, purple, and ascatarray
        gene : list
            List that contains the chromosome, start and end position of the gene where we want to find the LOH

    Returns
    -------
        sol : str
            LOH found in the region. Values can be A, D, L, or N
        pur : float|str
            If the purity has been found in the output file, a float with the purity reported.
            Otherwise "NA"
    """
    sol = "Not found"
    pur = "Not found"
    if os.path.isfile(path):
        reg = lc.convert2region(path, program, "error")
        pur = lg.getPurity(reg)
        sol = lg.getCopyNumber(gene[1:3], gene[0], reg)

    return (sol, pur)
示例#3
0
 diff = []
 bed1 = []
 bed2 = []
 tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd,
                                       sub=c[0],
                                       tm=tm[0].split("-")[0],
                                       cn=cn[0].split("-")[0])
 filename = tf.split("/")[-1]
 output = "{}.txt".format(filename)
 output1 = "{}.regsCoin.bed".format(filename)
 output2 = "{}.regsDiff.bed".format(filename)
 facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf)
 ascat = mm.findAscatName("{}_ASCAT/".format(tf))
 sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0])
 if os.path.isfile(facets):
     outf = lc.convert2region(facets, "facets", "quiet")
 if os.path.isfile(ascat):
     outa = lc.convert2region(ascat, "ascatngs", "quiet")
 if os.path.isfile(sequenza):
     outs = lc.convert2region(sequenza, "sequenza", "quiet")
 # Compare FACETS vs ascatNGS
 # if os.path.isfile(facets) and os.path.isfile(ascat) :
 #     regs = lc.getFragments(outf, outa)
 #     compareRegions(regs, outf, outa, same, diff, bed1, bed2)
 #
 #     with open(output, "w") as fi :
 #         fi.write(",".join(same))
 #         fi.write("\n")
 #         fi.write(",".join(diff))
 #         fi.write("\n")
 #     with open(output1, "w") as fi :
示例#4
0
     q = cur.execute("SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'".format(c[0]))
     controls = q.fetchall()
 for tm in tumors :
     for cn in controls :
         # Get the analysis absolute path
         analysis = "{tm}_VS_{cn}".format(tm = tm[0].split("-")[0], cn = cn[0].split("-")[0])
         linea = "{}\t".format(analysis)
         # Get the variant annotation file name
         tf = "{wd}/{sub}/{tumor}".format(wd = wd, sub = c[0], tumor = tm[0])
         cf = "{wd}/{sub}/{control}".format(wd = wd, sub = c[0], control = cn[0])
         platypust = "{}/platypusGerm/platypus.hg38_multianno.txt".format(tf)
         platypusc = "{}/platypusGerm/platypus.hg38_multianno.txt".format(cf)
         # Get the FACETS, ascatNGS and sequenza output in REGION format
         ficFa = "{wd}/{sub}/{folder}_FACETS/facets_comp_cncf.tsv".format(wd = wd, sub = c[0], folder = analysis)
         if os.path.isfile(ficFa) :
             regFa = lc.convert2region(ficFa, "facets")
         else :
             regFa = "X"
         ficAs = lib.findAscatName("{wd}/{case}/{folder}_ASCAT/".format(wd = wd, case = c[0], folder = analysis))
         if os.path.isfile(ficAs) :
             regAs = lc.convert2region(ficAs, "ascatngs")
         else :
             regAs = "X"
         ficSe = "{wd}/{case}/{folder}_Sequenza/{case}_segments.txt".format(folder = analysis, case = c[0], wd = wd)
         if os.path.isfile(ficSe) :
             regSe = lc.convert2region(ficSe, "sequenza")
         else :
             regSe = "X"
         # Get the information regarding the worst variant in the gene selected found in platypus variant calling
         variant = lib.getWorst(platypusc, "BRCA1")
         linea += "{}\t".format(variant)
示例#5
0
def main(cancer="OV"):
    """Main program"""

    wd = "/g/strcombio/fsupek_cancer2/TCGA_bam/{}".format(cancer)
    txt = "submitter\tcase\tfac_meanCN\tfac_purity\tfac_ploidy\tfac_aberration\tasc_meanCN\tasc_aberration\tseq_meanCN\tseq_purity\tseq_ploidy\tseq_aberration\tpur_meanCN\tpur_purity\t"
    txt += "pur_ploidy\tpur_aberration\tngs_meanCN\tngs_purity\tngs_ploidy\tngs_aberration\n"
    na = "NA"
    outputFile = "meanCN.tsv"
    count = 0

    # Get submitters list
    with dbcon:
        cur = dbcon.cursor()
        q = cur.execute(
            "SELECT submitter FROM patient WHERE cancer='{}'".format(cancer))
        cases = q.fetchall()

    print("INFO: Analysis done in {} cases".format(len(cases)))
    for c in cases:
        count += 1
        if count % 100 == 0:
            print("INFO: {} cases done".format(count))

        with dbcon:
            cur = dbcon.cursor()
            q = cur.execute(
                "SELECT uuid, bamName FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'"
                .format(c[0]))
            tumors = q.fetchall()
            q = cur.execute(
                "SELECT uuid, bamName FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'"
                .format(c[0]))
            controls = q.fetchall()

        for tm in tumors:
            for cn in controls:
                tf = "{wd}/{sub}/{tumor}".format(wd=wd, sub=c[0], tumor=tm[0])
                cf = "{wd}/{sub}/{control}".format(wd=wd,
                                                   sub=c[0],
                                                   control=cn[0])
                workindir = "{wd}/{sub}".format(wd=wd, sub=c[0])
                analysisdir = "{}_VS_{}".format(
                    tm[0].split("-")[0], cn[0].split("-")[0]
                )  # The folder format for FACETS, ascatNGS, and Sequenza is "[tumorUUID]_VS_[controlUUID]""
                # From each tool get the purity/ploidy
                rAscat = {"purity": na, "ploidy": na}
                # And calculate, using libstatistics, the mean copy number, and the percentage of (A)mplifications, (L)OH, (D)eletion or (N)ormal copy number
                sAscat = {
                    "meanCN": na,
                    "perA": na,
                    "perL": na,
                    "perD": na,
                    "perN": na
                }
                rFacets = {"purity": na, "ploidy": na}
                sFacets = {
                    "meanCN": na,
                    "perA": na,
                    "perL": na,
                    "perD": na,
                    "perN": na
                }
                rNgs = {"purity": na, "ploidy": na}
                sNgs = {
                    "meanCN": na,
                    "perA": na,
                    "perL": na,
                    "perD": na,
                    "perN": na
                }
                rSequenza = {"purity": na, "ploidy": na}
                sSequenza = {
                    "meanCN": na,
                    "perA": na,
                    "perL": na,
                    "perD": na,
                    "perN": na
                }
                rPurple = {"purity": na, "ploidy": na}
                sPurple = {
                    "meanCN": na,
                    "perA": na,
                    "perL": na,
                    "perD": na,
                    "perN": na
                }

                folder = "{}/ASCAT2".format(workindir)
                # Collect and calculate all the data
                # From ASCAT2
                if os.path.isdir(folder) and len(os.listdir(folder)) > 0:
                    temp = os.listdir(folder)[0]  # TODO: Check all ASCAT files
                    ascat = "{wd}/{fi}".format(wd=folder, fi=temp)
                    rAscat = lc.convert2region(ascat, "ascatarray", "error")
                    sAscat = ls.meanCoverage(rAscat)
                # From FACETS
                facets = "{wd}/{folder}_FACETS/facets_comp_cncf.tsv".format(
                    wd=workindir, folder=analysisdir)
                if os.path.isfile(facets):
                    rFacets = lc.convert2region(facets, "facets", "error")
                    sFacets = ls.meanCoverage(rFacets)
                # From ascatNGS
                ascatngs = lib.findAscatName("{wd}/{folder}_ASCAT/".format(
                    wd=workindir, folder=analysisdir))
                if ascatngs != "Not found":
                    rNgs = lc.convert2region(ascatngs, "ascatngs", "error")
                    sNgs = ls.meanCoverage(rNgs)
                # From Sequenza
                sequenza = "{wd}/{folder}_Sequenza/{case}_segments.txt".format(
                    folder=analysisdir, case=c[0], wd=workindir)
                if os.path.isfile(sequenza):
                    rSequenza = lc.convert2region(sequenza, "sequenza",
                                                  "error")
                    sSequenza = ls.meanCoverage(rSequenza)
                # From PURPLE
                purple = "{wd}/{folder}_PURPLE/TUMOR.purple.cnv.somatic.tsv".format(
                    wd=workindir, folder=analysisdir)
                if os.path.isfile(purple):
                    rPurple = lc.convert2region(purple, "purple", "error")
                    sPurple = ls.meanCoverage(rPurple)

                # Write the output in RAM
                txt += "{sub}\t{an}\t{fmcn}\t{fpu}\t{fpl}\t{fab}\t{acn}\t{aab}\t{scn}\t{spu}\t{spl}\t{sab}\t{pcn}\t{ppu}\t{ppl}\t{pab}\t{ncn}\t{npu}\t{npl}\t{nab}\n".format(
                    sub=c[0],
                    an=analysisdir,
                    fmcn=sFacets["meanCN"],
                    fpu=rFacets["purity"],
                    fpl=rFacets["ploidy"],
                    fab=convertToCSV(sFacets),
                    acn=sAscat["meanCN"],
                    aab=convertToCSV(sAscat),
                    scn=sSequenza["meanCN"],
                    spu=rSequenza["purity"],
                    spl=rSequenza["ploidy"],
                    sab=convertToCSV(sSequenza),
                    pcn=sPurple["meanCN"],
                    ppu=rPurple["purity"],
                    ppl=rPurple["ploidy"],
                    pab=convertToCSV(sPurple),
                    ncn=sNgs["meanCN"],
                    npu=rNgs["purity"],
                    npl=rNgs["ploidy"],
                    nab=convertToCSV(sNgs))

    with open(outputFile, "w") as fi:
        fi.write(txt)
    print("INFO: Data stored in {} file".format(outputFile))
示例#6
0
def main() :
    # Constants
    dbcon = sqlite3.connect("/g/strcombio/fsupek_cancer2/TCGA_bam/info/info.db")
    cancer = "OV"
    cancerpath = "/g/strcombio/fsupek_cancer2/TCGA_bam/"
    if os.path.isdir("main6") :
        print("ERROR: Folder for output already exists. Remove it before to continue")
        sys.exit(1)

    # Get the OV submitters from the database
    with dbcon :
        query = "SELECT submitter FROM patient WHERE cancer='{}'".format(cancer)
        c = dbcon.cursor()
        x = c.execute(query)
        submitters = x.fetchall()

    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} ASCAT".format(s))
        ascatFolder = "{}/ASCAT2/".format(workindir)
        if os.path.isdir (ascatFolder) :
            # Open ASCAT2 folder and get the files available
            ascatFiles = os.listdir(ascatFolder)
            # Compare ASCAT2 with itself
            for a in ascatFiles :
                ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                if not os.path.isfile("ascat2VSascat2.tsv") :
                    createFile("ascat2VSascat2.tsv")
                with open("ascat2VSascat2.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(ascat, ascat)))
            # Open SNP-array folder and get the files that are in
            arrayFolder = "{}/Array/".format(workindir)
            # Compare SNP-Arrays CNV outputs with ASCAT2
            if os.path.isdir(arrayFolder) :
                arrayFiles = os.listdir(arrayFolder)
                # print("INFO: Comparing ASCAT2 and Array outputs in {}".format(s))
                for a in ascatFiles :
                    ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                    for b in arrayFiles :
                        arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array")
                        if not os.path.isfile("ascat2VSarray.tsv") :
                            createFile("ascat2VSarray.tsv")
                        with open("ascat2VSarray.tsv", "a") as fi :
                            fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, arr)))


            # Compare FACETS LOH/CNV outputs with ASCAT2
            facetsFiles = getFACETS(workindir)
            for a in ascatFiles :
                ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                for b in facetsFiles :
                    f = lc.convert2region(b, "facets", "error")
                    if not os.path.isfile("ascat2VSfacets.tsv") :
                        createFile("ascat2VSfacets.tsv")
                    with open("ascat2VSfacets.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, f)))

            # Compare ascatNGS LOH/CNV outputs with ASCAT2
            # print("INFO: Comparing ASCAT2 and ascatNGS outputs in {}".format(s))
            ascatngsFiles = getAscatNGS(workindir)
            for a in ascatFiles :
                ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                for b in ascatngsFiles :
                    ngs = lc.convert2region(b, "ascatngs", "error")
                    if not os.path.isfile("ascat2VSascatNGS.tsv") :
                        createFile("ascat2VSascatNGS.tsv")
                    with open("ascat2VSascatNGS.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, ngs)))

            # Compare Sequenza LOH/CNV outputs with ASCAT2
            # print("INFO: Comparing ASCAT2 and Sequenza outputs in {}".format(s))
            sequenzaFiles = getSequenza(workindir)
            for a in ascatFiles :
                ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                for b in sequenzaFiles :
                    seq = lc.convert2region(b, "sequenza", "error")
                    if not os.path.isfile("ascat2VSsequenza.tsv") :
                        createFile("ascat2VSsequenza.tsv")
                    with open("ascat2VSsequenza.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, seq)))

            purpleFiles = getPurple(workindir)
            for a in ascatFiles :
                ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray")
                for b in purpleFiles :
                    purp = lc.convert2region(b, "purple", "error")
                    if not os.path.isfile("ascat2VSpurple.tsv") :
                        createFile("ascat2VSpurple.tsv")
                    with open("ascat2VSpurple.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, purp)))

    # Move the output data to a new folder
    os.mkdir("main6")
    os.rename("ascat2VSascat2.tsv", "main6/ascat2VSascat2.tsv")
    os.rename("ascat2VSarray.tsv", "main6/ascat2VSarray.tsv")
    os.rename("ascat2VSfacets.tsv", "main6/ascat2VSfacets.tsv")
    os.rename("ascat2VSascatNGS.tsv", "main6/ascat2VSascatNGS.tsv")
    os.rename("ascat2VSsequenza.tsv", "main6/ascat2VSsequenza.tsv")
    os.rename("ascat2VSpurple.tsv", "main6/ascat2VSpurple.tsv")

    # Repeat the analysis, but using Arrays as Truth set
    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} arrays".format(s))
        # Open SNP-Array folder and get the files available
        arrayFolder = "{}/Array/".format(workindir)
        if os.path.isdir (arrayFolder) :
            arrayFiles = os.listdir(arrayFolder)
            # Compare arrays with itself
            for a in arrayFiles :
                arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                if not os.path.isfile("arrayVSarray.tsv") :
                    createFile("arrayVSarray.tsv")
                with open("arrayVSarray.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(arr, arr)))
            # Open ASCAT2 folder to get the files that are in
            ascatFolder = "{}/ASCAT2/".format(workindir)
            # Compare ASCAT2 outputs with Arrays
            if os.path.isdir(ascatFolder) :
                # print("INFO: Comparing ASCAT2 and Array outputs in {}".format(s))
                ascatFiles = os.listdir(ascatFolder)
                for a in arrayFiles :
                    arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                    for b in ascatFiles :
                        ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray")
                        if not os.path.isfile("arrayVSascat2.tsv") :
                            createFile("arrayVSascat2.tsv")
                        with open("arrayVSascat2.tsv", "a") as fi :
                            fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, ascat)))


            # Compare FACETS LOH/CNV outputs with SNP-Array
            # print("INFO: Comparing ASCAT2 and FACETS outputs in {}".format(s))
            facetsFiles = getFACETS(workindir)
            for a in arrayFiles :
                arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                for b in facetsFiles :
                    f = lc.convert2region(b, "facets", "error")
                    if not os.path.isfile("arrayVSfacets.tsv") :
                        createFile("arrayVSfacets.tsv")
                    with open("arrayVSfacets.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, f)))

            # Compare ascatNGS LOH/CNV outputs with SNP-Array
            # print("INFO: Comparing ASCAT2 and ascatNGS outputs in {}".format(s))
            ascatngsFiles = getAscatNGS(workindir)
            for a in arrayFiles :
                arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                for b in ascatngsFiles :
                    ngs = lc.convert2region(b, "ascatngs", "error")
                    if not os.path.isfile("arrayVSascatNGS.tsv") :
                        createFile("arrayVSascatNGS.tsv")
                    with open("arrayVSascatNGS.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, ngs)))

            # Compare Sequenza LOH/CNV outputs with ASCAT2
            # print("INFO: Comparing ASCAT2 and Sequenza outputs in {}".format(s))
            sequenzaFiles = getSequenza(workindir)
            for a in arrayFiles :
                arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                for b in sequenzaFiles :
                    seq = lc.convert2region(b, "sequenza", "error")
                    if not os.path.isfile("arrayVSsequenza.tsv") :
                        createFile("arrayVSsequenza.tsv")
                    with open("arrayVSsequenza.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, seq)))

            # Compare PURPLE LOH/CNV outputs with DNAcopy
            purpleFiles = getPurple(workindir)
            for a in arrayFiles :
                arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array")
                for b in purpleFiles :
                    purp = lc.convert2region(b, "purple", "error")
                    if not os.path.isfile("arrayVSpurple.tsv") :
                        createFile("arrayVSpurple.tsv")
                    with open("arrayVSpurple.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, purp)))

    os.rename("arrayVSarray.tsv", "main6/arrayVSarray.tsv")
    os.rename("arrayVSascat2.tsv", "main6/arrayVSascat2.tsv")
    os.rename("arrayVSfacets.tsv", "main6/arrayVSfacets.tsv")
    os.rename("arrayVSascatNGS.tsv", "main6/arrayVSascatNGS.tsv")
    os.rename("arrayVSsequenza.tsv", "main6/arrayVSsequenza.tsv")
    os.rename("arrayVSpurple.tsv", "main6/arrayVSpurple.tsv")

    # Repeat the analysis but comparing FACETS vs all the other tools
    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} FACETS".format(s))
        # Get all the FACETS done in the submitter
        facetsFiles = getFACETS(workindir)
        for a in facetsFiles :
            # Compàre FACETS with itself
            f = lc.convert2region(a, "facets", "error")
            if not os.path.isfile("facetsVSfacets.tsv") :
                createFile("facetsVSfacets.tsv")
            with open("facetsVSfacets.tsv", "a") as fi :
                fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(f, f)))

            # Compare with ASCAT2
            ascatFolder = "{}/ASCAT2".format(workindir)
            if os.path.isdir (ascatFolder) :
                # Open ASCAT2 folder and get the files available
                ascatFiles = os.listdir(ascatFolder)
                # Compare ASCAT2 with itself
                for b in ascatFiles :
                    ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error")
                    if not os.path.isfile("facetsVSascat2.tsv") :
                        createFile("facetsVSascat2.tsv")
                    with open("facetsVSascat2.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, ascat)))

            # Compare with SNP-Arrays
            arrayFolder = "{}/Array".format(workindir)
            if os.path.isdir(arrayFolder) :
                arrayFiles = os.listdir(arrayFolder)
                for b in arrayFiles :
                    arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error")
                    if not os.path.isfile("facetsVSarrays.tsv") :
                        createFile("facetsVSarrays.tsv")
                    with open("facetsVSarrays.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, arr)))

            # Compare with ascatNGS
            ascatngsFiles = getAscatNGS(workindir)
            for b in ascatngsFiles :
                ngs = lc.convert2region(b, "ascatngs", "error")
                if not os.path.isfile("facetsVSascatNGS.tsv") :
                    createFile("facetsVSascatNGS.tsv")
                with open("facetsVSascatNGS.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, ngs)))

            # Compare with Sequenza
            sequenzaFiles = getSequenza(workindir)
            for b in sequenzaFiles :
                seq = lc.convert2region(b, "sequenza", "error")
                if not os.path.isfile("facetsVSsequenza.tsv") :
                    createFile("facetsVSsequenza.tsv")
                with open("facetsVSsequenza.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, seq)))

            # Compare with PURPLE
            purpleFiles = getPurple(workindir)
            for b in purpleFiles :
                purp = lc.convert2region(b, "purple", "error")
                if not os.path.isfile("facetsVSpurple.tsv") :
                    createFile("facetsVSpurple.tsv")
                with open("facetsVSpurple.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, purp)))

    os.rename("facetsVSfacets.tsv", "main6/facetsVSfacets.tsv")
    os.rename("facetsVSascat2.tsv", "main6/facetsVSascat2.tsv")
    os.rename("facetsVSarrays.tsv", "main6/facetsVSarrays.tsv")
    os.rename("facetsVSascatNGS.tsv", "main6/facetsVSascatNGS.tsv")
    os.rename("facetsVSsequenza.tsv", "main6/facetsVSsequenza.tsv")
    os.rename("facetsVSpurple.tsv", "main6/facetsVSpurple.tsv")

    # Repeat the analysis, but comparing ascatNGS vs all the other tools
    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} ascatNGS".format(s))
        # Get all the ascatNGS done in the submitter
        ascatngsFiles = getAscatNGS(workindir)
        for a in ascatngsFiles :
            # Compare ascatNGS vs itself
            ngs = lc.convert2region(a, "ascatngs", "error")
            if not os.path.isfile("ascatNGSVSascatNGS.tsv") :
                createFile("ascatNGSVSascatNGS.tsv")
            with open("ascatNGSVSascatNGS.tsv", "a") as fi :
                fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(ngs, ngs)))

            # Compare with ASCAT2
            ascatFolder = "{}/ASCAT2/".format(workindir)
            if os.path.isdir(ascatFolder) :
                ascatFiles = os.listdir(ascatFolder)
                for b in ascatFiles :
                    ascat = lc.convert2region("{}{}".format(ascatFolder, b), "ascatarray", "error")
                    if not os.path.isfile("ascatNGSVSascat2.tsv") :
                        createFile("ascatNGSVSascat2.tsv")
                    with open("ascatNGSVSascat2.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, ascat)))

            # Compare with SNP-Arrays
            arrayFolder = "{}/Array/".format(workindir)
            if os.path.isdir(arrayFolder) :
                arrayFiles = os.listdir(arrayFolder)
                for b in arrayFiles :
                    arr = lc.convert2region("{}{}".format(arrayFolder, b), "array", "error")
                    if not os.path.isfile("ascatNGSVSarrays.tsv") :
                        createFile("ascatNGSVSarrays.tsv")
                    with open("ascatNGSVSarrays.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, arr)))

            # Compare with FACETS
            facetsFiles = getFACETS(workindir)
            for b in facetsFiles :
                f = lc.convert2region(b, "facets", "error")
                if not os.path.isfile("ascatNGSVSfacets.tsv") :
                    createFile("ascatNGSVSfacets.tsv")
                with open("ascatNGSVSfacets.tsv", "a") as fi :

                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, f)))

            # Compare with Sequenza
            sequenzaFiles = getSequenza(workindir)
            for b in sequenzaFiles :
                seq = lc.convert2region(b, "sequenza", "error")
                if not os.path.isfile("ascatNGSVSsequenza.tsv") :
                    createFile("ascatNGSVSsequenza.tsv")
                with open("ascatNGSVSsequenza.tsv", "a") as fi:
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, seq)))

            # Compare with PURPLE
            purpleFiles = getPurple(workindir)
            for b in purpleFiles :
                purp = lc.convert2region(b, "purple", "error")
                if not os.path.isfile("ascatNGSVSpurple.tsv") :
                    createFile("ascatNGSVSpurple.tsv")
                with open("ascatNGSVSpurple.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, purp)))

    os.rename("ascatNGSVSascat2.tsv", "main6/ascatNGSVSascat2.tsv")
    os.rename("ascatNGSVSarrays.tsv", "main6/ascatNGSVSarrays.tsv")
    os.rename("ascatNGSVSfacets.tsv", "main6/ascatNGSVSfacets.tsv")
    os.rename("ascatNGSVSascatNGS.tsv", "main6/ascatNGSVSascatNGS.tsv")
    os.rename("ascatNGSVSsequenza.tsv", "main6/ascatNGSVSsequenza.tsv")
    os.rename("ascatNGSVSpurple.tsv", "main6/ascatNGSVSpurple.tsv")

    # Repeat the analysis, but comparing Sequenza vs all the other approximations
    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} Sequenza".format(s))
        # Get all the Sequenza done in the submitter
        sequenzaFiles = getSequenza(workindir)
        for a in sequenzaFiles :
            # Compare sequenza vs itself
            seq = lc.convert2region(a, "sequenza", "error")
            if not os.path.isfile("sequenzaVSsequenza.tsv") :
                createFile("sequenzaVSsequenza.tsv")
            with open("sequenzaVSsequenza.tsv", "a") as fi :
                fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(seq, seq)))

            # Compare with ASCAT2
            ascatFolder = "{}/ASCAT2".format(workindir)
            if os.path.isdir(ascatFolder) :
                ascatFiles = os.listdir(ascatFolder)
                for b in ascatFiles :
                    ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error")
                    if not os.path.isfile("sequenzaVSascat2.tsv") :
                        createFile("sequenzaVSascat2.tsv")
                    with open("sequenzaVSascat2.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, ascat)))

            # Compare with SNP-Arrays
            arrayFolder = "{}/Array".format(workindir)
            if os.path.isdir(arrayFolder) :
                arrayFiles = os.listdir(arrayFolder)
                for b in arrayFiles :
                    arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error")
                    if not os.path.isfile("sequenzaVSarrays.tsv") :
                        createFile("sequenzaVSarrays.tsv")
                    with open("sequenzaVSarrays.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, arr)))

            # Compare with FACETS
            facetsFiles = getFACETS(workindir)
            for b in facetsFiles :
                f = lc.convert2region(b, "facets", "error")
                if not os.path.isfile("sequenzaVSfacets.tsv") :
                    createFile("sequenzaVSfacets.tsv")
                with open("sequenzaVSfacets.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, f)))

            # Compare with ascatNGS
            ascatngsFiles = getAscatNGS(workindir)
            for b in ascatngsFiles :
                ngs = lc.convert2region(b, "ascatngs", "error")
                if not os.path.isfile("sequenzaVSascatNGS.tsv") :
                    createFile("sequenzaVSascatNGS.tsv")
                with open("sequenzaVSascatNGS.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, ngs)))

            # Compare with PURPLE
            purpleFiles = getPurple(workindir)
            for b in purpleFiles :
                purp = lc.convert2region(b, "purple", "error")
                if not os.path.isfile("sequenzaVSpurple.tsv") :
                    createFile("sequenzaVSpurple.tsv")
                with open("sequenzaVSpurple.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, purp)))

    os.rename("sequenzaVSascat2.tsv", "main6/sequenzaVSascat2.tsv")
    os.rename("sequenzaVSarrays.tsv", "main6/sequenzaVSarrays.tsv")
    os.rename("sequenzaVSfacets.tsv", "main6/sequenzaVSfacets.tsv")
    os.rename("sequenzaVSascatNGS.tsv", "main6/sequenzaVSascatNGS.tsv")
    os.rename("sequenzaVSsequenza.tsv", "main6/sequenzaVSsequenza.tsv")
    os.rename("sequenzaVSpurple.tsv", "main6/sequenzaVSpurple.tsv")

    # Repeat the analysis but comparing PURPLE vs all the other tools
    for sub in submitters :
        s = sub[0]
        workindir = "{}/{}/{}".format(cancerpath, cancer, s)
        print("INFO: Checking {} PURPLE".format(s))
        # Get all the Sequenza done in the submitter
        purpleFiles = getPurple(workindir)
        for a in purpleFiles :
            # Compare PURPLE vs itself
            purp = lc.convert2region(a, "purple", "error")
            if not os.path.isfile("purpleVSpurple.tsv") :
                createFile("purpleVSpurple.tsv")
            with open("purpleVSpurple.tsv", "a") as fi :
                fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(purp, purp)))

            # Compare with ASCAT2
            ascatFolder = "{}/ASCAT2".format(workindir)
            if os.path.isdir(ascatFolder) :
                ascatFiles = os.listdir(ascatFolder)
                for b in ascatFiles :
                    ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error")
                    if not os.path.isfile("purpleVSascat2.tsv") :
                        createFile("purpleVSascat2.tsv")
                    with open("purpleVSascat2.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, ascat)))

            # Compare with SNP-Arrays
            arrayFolder = "{}/Array".format(workindir)
            if os.path.isdir(arrayFolder) :
                arrayFiles = os.listdir(arrayFolder)
                for b in arrayFiles :
                    arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error")
                    if not os.path.isfile("purpleVSarrays.tsv") :
                        createFile("purpleVSarrays.tsv")
                    with open("purpleVSarrays.tsv", "a") as fi :
                        fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, arr)))

            # Compare with FACETS
            facetsFiles = getFACETS(workindir)
            for b in facetsFiles :
                f = lc.convert2region(b, "facets", "error")
                if not os.path.isfile("purpleVSfacets.tsv") :
                    createFile("purpleVSfacets.tsv")
                with open("purpleVSfacets.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, f)))

            # Compare with ascatNGS
            ascatngsFiles = getAscatNGS(workindir)
            for b in ascatngsFiles :
                ngs = lc.convert2region(b, "ascatngs", "error")
                if not os.path.isfile("purpleVSascatNGS.tsv") :
                    createFile("purpleVSascatNGS.tsv")
                with open("purpleVSascatNGS.tsv", "a") as fi :
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, ngs)))

            # Compare with Sequenza
            sequenzaFiles = getSequenza(workindir)
            for b in sequenzaFiles :
                seq = lc.convert2region(b, "sequenza", "error")
                if not os.path.isfile("purpleVSsequenza.tsv") :
                    createFile("purpleVSsequenza.tsv")
                with open("purpleVSsequenza.tsv", "a") as fi:
                    fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, seq)))

    os.rename("purpleVSascat2.tsv", "main6/purpleVSascat2.tsv")
    os.rename("purpleVSarrays.tsv", "main6/purpleVSarrays.tsv")
    os.rename("purpleVSfacets.tsv", "main6/purpleVSfacets.tsv")
    os.rename("purpleVSascatNGS.tsv", "main6/purpleVSascatNGS.tsv")
    os.rename("purpleVSsequenza.tsv", "main6/purpleVSsequenza.tsv")
    os.rename("purpleVSpurple.tsv", "main6/purpleVSpurple.tsv")
示例#7
0
def launchAnalysis(folder, array, ascat, facets):
    pythonpath = os.path.dirname(os.path.realpath(__file__))
    #Go to the working directory to run the comparison analysis
    print("INFO: Analysing {}".format(folder))
    os.chdir(folder)
    ascatReg = None
    facetsReg = None
    arrayReg = None
    arrayPath = "../{}".format(array)
    id = folder.split("/")[-1]
    if ascat != "":
        ascatReg = comp.convert2region(ascat, "ascat")
    if facets != "":
        facetsReg = comp.convert2region(facets, "facets")

    arrayReg = comp.convert2region(arrayPath, "array")

    #if ascatReg != None and facetsReg != None : #Calculate logR between AscatNGS and FACETS
    #regA_F = comp.getFragments(ascatReg, facetsReg)
    #st.logRcomp(regA_F, ascatReg, facetsReg, "ascatNGS", "FACETS")

    if facetsReg != None and arrayReg != None:
        regAr_F = comp.getFragments(arrayReg, facetsReg)
        mt1 = comp.doComparison2(regAr_F, facetsReg, arrayReg)
        jc1 = st.jaccardIndex(mt1, ["A", "D"])
        mt1 = comp.doComparison(regAr_F, facetsReg, arrayReg)
        cm1 = st.doContingency(mt1, ["A", "D"])

    if ascatReg != None and arrayReg != None:
        regAr_A = comp.getFragments(arrayReg, ascatReg)
        mt2 = comp.doComparison2(regAr_F, ascatReg, arrayReg)
        jc2 = st.jaccardIndex(mt2, ["A", "D"])
        mt2 = comp.doComparison(regAr_F, ascatReg, arrayReg)
        cm2 = st.doContingency(mt2, ["A", "D"])

    #Return to current python path
    os.chdir(pythonpath)
    #Store the summary data in the corresponding files
    if ascatReg != None:
        with open("ascatPurities.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, ascatReg["purity"]))
        with open("ascatPloidies.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, ascatReg["ploidy"]))
        with open("ascatJaccard.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, jc2))
        with open("ascatACCamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"]))
        with open("ascatTPRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["TPR"]))
        with open("ascatTNRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["TNR"]))
        with open("ascatPPVamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["PPV"]))
        with open("ascatFDRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"]))
        with open("ascatACCdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"]))
        with open("ascatTPRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["TPR"]))
        with open("ascatTNRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["TNR"]))
        with open("ascatPPVdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["PPV"]))
        with open("ascatFDRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"]))
    if facetsReg != None:
        with open("facetsPurities.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, facetsReg["purity"]))
        with open("facetsPloidies.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, facetsReg["ploidy"]))
        with open("facetsJaccard.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, jc1))
        with open("facetsACCamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"]))
        with open("facetsTPRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["TPR"]))
        with open("facetsTNRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["TNR"]))
        with open("facetsPPVamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["PPV"]))
        with open("facetsFDRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"]))
        with open("facetsACCdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
        with open("facetsTPRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["TPR"]))
        with open("facetsTNRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["TNR"]))
        with open("facetsPPVdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["PPV"]))
        with open("facetsFDRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
示例#8
0
def main():
    fvaFi = "facetsVSascatngs.tsv"
    fvsFi = "facetsVSsequenza.tsv"
    avsFi = "ascatVSsequenza.tsv"
    # Write the output files' header
    with open(fvaFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tApurity\tFploidy\tAploidy\n"
        )
    with open(fvsFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tSpurity\tFploidy\tSploidy\n"
        )
    with open(avsFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tApurity\tSpurity\tAploidy\tSploidy\n"
        )

    table = []
    with dbcon:
        cur = dbcon.cursor()
        q = cur.execute("SELECT submitter FROM patient WHERE cancer='OV'")
        cases = q.fetchall()

    for c in cases:
        # Recollir la informacio dels bams i el sexe que te el cas registrats
        with dbcon:
            cur = dbcon.cursor()
            q = cur.execute(
                "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'"
                .format(c[0]))
            tumors = q.fetchall()
            q = cur.execute(
                "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'"
                .format(c[0]))
            controls = q.fetchall()
        for tm in tumors:
            for cn in controls:
                fva = []
                fvs = []
                avs = []
                # Get the absolute path the and the prefix for the tool output
                tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd,
                                                      sub=c[0],
                                                      tm=tm[0].split("-")[0],
                                                      cn=cn[0].split("-")[0])
                fva.append(tf.split("/")[-1])
                fvs.append(tf.split("/")[-1])
                avs.append(tf.split("/")[-1])
                facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf)
                ascat = mm.findAscatName("{}_ASCAT/".format(tf))
                sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0])
                if os.path.isfile(facets):
                    outf = lc.convert2region(facets, "facets")
                if os.path.isfile(ascat):
                    outa = lc.convert2region(ascat, "ascatngs")
                if os.path.isfile(sequenza):
                    outs = lc.convert2region(sequenza, "sequenza")
                # Compare FACETS vs ascatNGS
                if os.path.isfile(facets) and os.path.isfile(ascat):
                    regs = lc.getFragments(outf, outa)
                    c1 = lc.doComparison(regs, outf, outa)
                    c2 = lc.doComparison2(regs, outf, outa)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(c2)
                    fva.append(ls.regSimilarity(regs, outf, outa))
                    fva.append(ls.baseSimilarity(regs, outf, outa))
                    for ab in cte.aberrations:
                        fva.append(sts[ab]["MCC"])
                    fva.append(jcc)
                    fva.append(outf["purity"])
                    fva.append(outs["purity"])
                    fva.append(outf["ploidy"])
                    fva.append(outs["ploidy"])
                else:
                    fva.append("NA")
                    fva.append("NA")
                    for ab in cte.aberrations:
                        fva.append("NA")
                    for ab in cte.aberrations:
                        fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                # Compare FACETS VS Sequenza
                if os.path.isfile(facets) and os.path.isfile(sequenza):
                    regs = lc.getFragments(outf, outs)
                    c1 = lc.doComparison(regs, outf, outs)
                    c2 = lc.doComparison2(regs, outf, outs)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(
                        c2)  # Get the Jaccard index for all the aberrations
                    fvs.append(ls.regSimilarity(regs, outf, outs))
                    fvs.append(ls.baseSimilarity(regs, outf, outs))
                    for ab in cte.aberrations:
                        fvs.append(sts[ab]["MCC"])
                    fvs.append(jcc)
                    fvs.append(outf["purity"])
                    fvs.append(outs["purity"])
                    fvs.append(outf["ploidy"])
                    fvs.append(outs["ploidy"])
                else:
                    fvs.append("NA")
                    fvs.append("NA")
                    for ab in cte.aberrations:
                        fvs.append("NA")
                    for ab in cte.aberrations:
                        fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                # Compare ascatNGS VS Sequenza
                if os.path.isfile(ascat) and os.path.isfile(sequenza):
                    regs = lc.getFragments(outa, outs)
                    c1 = lc.doComparison(regs, outa, outs)
                    c2 = lc.doComparison2(regs, outa, outs)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(
                        c2)  # Get the Jaccard index for all the aberrations
                    avs.append(ls.regSimilarity(regs, outa, outs))
                    avs.append(ls.baseSimilarity(regs, outa, outs))
                    for ab in cte.aberrations:
                        avs.append(sts[ab]["MCC"])
                    avs.append(jcc)
                    avs.append(outf["purity"])
                    avs.append(outs["purity"])
                    avs.append(outf["ploidy"])
                    avs.append(outs["ploidy"])
                else:
                    avs.append("NA")
                    avs.append("NA")
                    for ab in cte.aberrations:
                        avs.append("NA")
                    for ab in cte.aberrations:
                        avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                # Write the output in the corresponding files for each comparison
                with open(fvaFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(fva[0], fva[1], fva[2], fva[3], fva[4], fva[5],
                               fva[6], fva[7], fva[8], fva[9], fva[10],
                               fva[11]))
                with open(fvsFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(fvs[0], fvs[1], fvs[2], fvs[3], fvs[4], fvs[5],
                               fvs[6], fvs[7], fvs[8], fvs[9], fvs[10],
                               fvs[11]))
                with open(avsFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(avs[0], avs[1], avs[2], avs[3], avs[4], avs[5],
                               avs[6], avs[7], avs[8], avs[9], avs[10],
                               avs[11]))
示例#9
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
MAIN: Testing the function that counts the number of bases that have each aberration
"""

import libcomparison as lc
import libstatistics as ls
import libconstants as ct
import libgetters as lg

print("INFO: Loading example from FACETS")
f = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_FACETS/facets_comp_cncf.tsv",
    "facets")
print("INFO: Loading example from ascatNGS")
a = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_ASCAT/TCGA-04-1331-01A-01W.copynumber.caveman.csv",
    "ascatngs")
print("INFO: Loading example from Sequenza")
s = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_Sequenza/TCGA-04-1331_segments.txt",
    "sequenza")
print("INFO: Loading example from PURPLE")
p = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_PURPLE/TUMOR.purple.cnv.somatic.tsv",
    "purple")
allbases = []
it = 0
current = 0
"""
示例#10
0
"""
MAIN: Example of introduction of sequenza
"""
"""
The test includes:
    1. Open the sample outputs from sequenza, facets, ascatNGS, and array respectively
    2. Compare all tools against arrays in the whole genome
    3. Get the copy number reported by the 4 tools in specific regions: BRCA1, BRCA2, PALB2, and ATM
"""

import libcomparison as lc
import libgetters as lg
import libstatistics as ls

# Open the output for the sample TCGA-04-1332 output from all the tools
sequenza = lc.convert2region(
    "../90cf56c6_VS_f4b549d0_Sequenza/TCGA-04-1332_segments.txt", "sequenza")
facets = lc.convert2region(
    "../90cf56c6c_VS_f4b549d0_FACETS/facets_comp_cncf.tsv", "facets")
ascat = lc.convert2region(
    "../90cf56c6_VS_f4b549d0_ASCAT/H_GP-04-1332-01A-01W-0488-09-1.copynumber.caveman.csv",
    "ascatngs")
array = lc.convert2region(
    "../73a3a9bb-7dfc-4fc5-9f31-b2630c82010b_Array/QUANT_p_TCGA_Batch12_AFFX_GenomeWideSNP_6_F05_437768.grch38.seg.v2.txt",
    "array")
print("INFO: Arxius oberts satisfactoriament")

# Print the counts in each file
print("\nINFO: Resum de les dades obteses en cada eina")
car = ls.countsXtool(array)
cs = ls.countsXtool(sequenza)
cf = ls.countsXtool(facets)
示例#11
0
    4) Extract confusion matrix for (A)mplification, (D)eletion, and (N)ormal copy number
    5) Calculate the Jaccard index for the same aberrations
"""

import libextractfile as exfi
import libcomparison as compi
import libgetters as ge
import libstatistics as sts
import libconstants as cts

print(
    "INFO: Test unitario para comparar el output de un ejemplo de FACETS con los datos del array descargado desde TCGA"
)
print("TEST 1) Extraer datos")
ar = compi.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_PURPLE/TUMOR.purple.cnv.somatic.tsv",
    "PURPLE")
fa = compi.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_FACETS/facets_comp_cncf.tsv",
    "FACETS")
print("TEST 2) Buscando las regiones en comun para estudio")
regs = compi.getFragments(ar, fa)
print("TEST 3) Crear la tabla comparativa 4x4")
dc = compi.doComparison(regs, ar, fa)
print(sts.printTable(dc, "Array", "FACETS", False))
print("TEST 4) Resultados de la matriz de confusion para cada aberracion")
c1, c2 = sts.calculateCounts(dc)
dicContingency = sts.doContingency(dc, ["A", "D", "N"])
print("\tAmplificacion\n\t{}\n\n".format(dicContingency["A"]))
print("\tDelecion\n\t{}\n\n".format(dicContingency["D"]))
print("\tNormal\n\t{}\n".format(dicContingency["N"]))
示例#12
0
    1. Open example outputs from sequenza, facets, ascatNGS, array, and ASCAT2
    2. Count the aberrations reported by each tool
    3. Get the copy number reported by the all outputs in BRCA1 and BRCA2 genes
"""

import libcomparison as lc
import libgetters as lg
import libstatistics as ls

# BRCA1/2 gene coordinates as reported by bioGPS
brca1 = ["17", 43044295, 43170245]
brca2 = ["13", 32315086, 32400266]

# Convert the files to REGION format
ascatngs = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_ASCAT/H_GP-04-1332-01A-01W-0488-09-1.copynumber.caveman.csv",
    "ascatngs", "error")
sequenza = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_Sequenza/TCGA-04-1332_segments.txt",
    "sequenza", "error")
facets = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_FACETS/facets_comp_cncf.tsv",
    "facets", "error")
array = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/Array/QUANT_p_TCGA_Batch12_AFFX_GenomeWideSNP_6_E11_437726.grch38.seg.v2.txt",
    "array")
ascat = lc.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/ASCAT2/TCGA-OV.79e63073-7d6d-456b-92c7-a3a7f0216ee7.ascat2.allelic_specific.seg.txt",
    "ascatarray")

print("INFO: Files opened successfully")
示例#13
0
    6) ggplot of minor copy number (lcn)
"""

import libextractfile as exfi
import libcomparison as compi
import libgetters as ge
import libstatistics as sts
import libconstants as cts
import os

print(
    "INFO: Test unitario para comprobar graficas comparando un ejemplo de ascatNGS y uno de FACETS"
)
print("TEST 1) Extraer los datos")
ascat = compi.convert2region(
    "/home/labs/solelab/ffuster2/Desktop/doctorat/cas_estudi/input_examples/TCGA-09-0369/TCGA-09-0369_40e311a4_VS_f4441d6e/H_GP-09-0369-01A-01W-0372-09-1.copynumber.caveman.csv",
    "ascat")
facets = compi.convert2region(
    "/home/labs/solelab/ffuster2/Desktop/doctorat/cas_estudi/input_examples/TCGA-09-0369/TCGA-09-0369_40e311a4_VS_f4441d6e/facets_comp_cncf.tsv",
    "FACETS")
print("TEST 2) Dividir las regiones para obtener regiones en comun")
regs = compi.getFragments(facets, ascat)
print("TEST 3) Dibujar la concordancia entre los logR")
try:
    sts.logRcomp(regs, facets, ascat, "FACETS", "ASCAT")
except ValueError:
    print("ERROR: Cannot create the logR plot")
print("TEST 4) Dibujar los copy number counts usando la libreria ggplot")
sts.doGGplotFiles(facets, ascat, "FACETS", "ASCAT")
print(
    "TEST 5) Crear un bed con las regiones reportadas por cada archivo y las regiones en comun"
示例#14
0
    percent = 100 * float(coin) / float(all)
    return percent


if __name__ == "__main__":
    """
        UNIT TEST
    """
    pr1 = "FACETS"
    pr2 = "ascatngs"
    print(
        "\n\n\t\tWELCOME TO libstatistics.py UNIT TEST\n\t\t-------------------------------------\n"
    )
    print("Reading FACETS example")
    fa = comp.convert2region(
        "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0887/4d1eb382_VS_5820c55c_FACETS/facets_comp_cncf.tsv",
        pr1)
    print("Reading AscatNGS example")
    s = comp.convert2region(
        "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0887/4d1eb382_VS_5820c55c_ASCAT/TCGA-13-0887-01A-01W.copynumber.caveman.csv",
        pr2)
    print("Read complete. Getting the fragments")
    regs = comp.getFragments(fa, s)
    print("Got fragments. Checking the copy number")
    dc = comp.doComparison(regs, fa, s)
    print("Copy number done. Preparing some statistics")
    print("1) Counts")
    c1, c2 = calculateCounts(dc)
    print("2) Counts per tool")
    counts1, count2 = countsXtool(fa, s)
    print("3) Bases reported in each aberration")