# Y matrices
    #
    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter()
    LOG.info(f"Gexp: {gexp.shape}")

    prot_obj = Proteomics()
    prot = prot_obj.filter()
    prot = prot[prot.count(1) > 300]
    LOG.info(f"Prot: {prot.shape}")

    # X matrices
    #
    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(dtype="merged")
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()
    drespo = drespo_obj.filter()
    drespo = drespo[drespo.count(1) > 300]
    drespo = drespo[["+" not in i for i in drespo.index]]
    drespo.index = [";".join(map(str, i)) for i in drespo.index]

    dtargets = drespo_obj.drugresponse.groupby(
        ["drug_id", "drug_name", "dataset"])["putative_gene_target"].first()
    dtargets.index = [";".join(map(str, i)) for i in dtargets.index]
    LOG.info(f"Drug: {drespo.shape}")

    # Covariates
    #
Пример #2
0
    drug_obj = DrugResponse()

    # Samples
    #
    samples = set.intersection(set(prot_obj.get_data()))
    LOG.info(f"Samples: {len(samples)}")

    # Filter data-sets
    #
    prot = prot_obj.filter(subset=samples)
    LOG.info(f"Proteomics: {prot.shape}")

    gexp = gexp_obj.filter(subset=samples)
    LOG.info(f"Transcriptomics: {gexp.shape}")

    crispr = crispr_obj.filter(subset=samples, dtype="merged")
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo = drug_obj.filter(subset=samples)
    drespo = drespo.set_index(
        pd.Series([";".join(map(str, i)) for i in drespo.index]))

    drespo_maxc = drug_obj.maxconcentration.copy()
    drespo_maxc.index = [
        ";".join(map(str, i)) for i in drug_obj.maxconcentration.index
    ]
    drespo_maxc = drespo_maxc.reindex(drespo.index)
    LOG.info(f"Drug response: {drespo.shape}")

    cn = cn_obj.filter(subset=samples.intersection(prot_obj.ss.index))
    cn = np.log2(cn.divide(prot_obj.ss.loc[cn.columns, "ploidy"]) + 1)
Пример #3
0
    elif gene in ESS_GENES:
        return "essential"

    elif gene in NESS_GENES:
        return "non-essential"

    else:
        return "unclassified"


if __name__ == "__main__":
    # Project score
    #
    cscore_obj = CRISPR()
    cscore = cscore_obj.filter(dtype="merged")
    cscore_ht29 = cscore["SIDM00136"]

    # Samplesheet
    #
    lib_name = "2gCRISPR_Pilot_library_v2.0.0.xlsx"

    lib_ss = pd.read_excel(f"{DPATH}/gi_samplesheet.xlsx")
    lib_ss = lib_ss.query(f"library == '{lib_name}'")

    lib = read_gi_library(lib_name)
    lib["sgRNA1_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA1_Approved_Symbol", "sgRNA1_Chr"]].values]
    lib["sgRNA2_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA2_Approved_Symbol", "sgRNA2_Chr"]].values]
    lib["vector_class"] = lib["sgRNA1_class"] + " + " + lib["sgRNA2_class"]

    samples = list(set(lib_ss["name"]))
TPATH = pkg_resources.resource_filename("tables", "/")


if __name__ == "__main__":
    # Data-sets
    #
    prot_obj = Proteomics()
    prot = prot_obj.filter()
    LOG.info(f"Proteomics: {prot.shape}")

    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter(subset=list(prot))
    LOG.info(f"Transcriptomics: {gexp.shape}")

    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(subset=list(prot))
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()

    drespo = drespo_obj.filter()
    drespo.index = [";".join(map(str, i)) for i in drespo.index]

    dmax = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[
        "max_screening_conc"
    ].first()
    dmax = (dmax * 0.5).pipe(np.log)
    dmax.index = [";".join(map(str, i)) for i in dmax.index]

    dtargets = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[
        "putative_gene_target"