axis=1)

    # Y matrices
    #
    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter()
    LOG.info(f"Gexp: {gexp.shape}")

    prot_obj = Proteomics()
    prot = prot_obj.filter()
    prot = prot[prot.count(1) > 300]
    LOG.info(f"Prot: {prot.shape}")

    # X matrices
    #
    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(dtype="merged")
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()
    drespo = drespo_obj.filter()
    drespo = drespo[drespo.count(1) > 300]
    drespo = drespo[["+" not in i for i in drespo.index]]
    drespo.index = [";".join(map(str, i)) for i in drespo.index]

    dtargets = drespo_obj.drugresponse.groupby(
        ["drug_id", "drug_name", "dataset"])["putative_gene_target"].first()
    dtargets.index = [";".join(map(str, i)) for i in dtargets.index]
    LOG.info(f"Drug: {drespo.shape}")

    # Covariates
Пример #2
0
        return "intergenic"

    elif gene in ESS_GENES:
        return "essential"

    elif gene in NESS_GENES:
        return "non-essential"

    else:
        return "unclassified"


if __name__ == "__main__":
    # Project score
    #
    cscore_obj = CRISPR()
    cscore = cscore_obj.filter(dtype="merged")
    cscore_ht29 = cscore["SIDM00136"]

    # Samplesheet
    #
    lib_name = "2gCRISPR_Pilot_library_v2.0.0.xlsx"

    lib_ss = pd.read_excel(f"{DPATH}/gi_samplesheet.xlsx")
    lib_ss = lib_ss.query(f"library == '{lib_name}'")

    lib = read_gi_library(lib_name)
    lib["sgRNA1_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA1_Approved_Symbol", "sgRNA1_Chr"]].values]
    lib["sgRNA2_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA2_Approved_Symbol", "sgRNA2_Chr"]].values]
    lib["vector_class"] = lib["sgRNA1_class"] + " + " + lib["sgRNA2_class"]
Пример #3
0
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multitest import multipletests
from crispy.DataImporter import Proteomics, CRISPR, GeneExpression, DrugResponse, Sample


LOG = logging.getLogger("Crispy")
RPATH = pkg_resources.resource_filename("reports", "eg/")


if __name__ == "__main__":
    # Data-sets
    #
    gexp_obj = GeneExpression()
    prot_obj = Proteomics()
    crispr_obj = CRISPR()
    drespo_obj = DrugResponse()

    # Samples
    #
    samples_crispr = set.intersection(
        set(prot_obj.get_data()), set(crispr_obj.get_data(dtype="merged"))
    )
    LOG.info(f"CRISPR samples: {len(samples_crispr)}")

    samples_drug = set.intersection(
        set(prot_obj.get_data()), set(drespo_obj.get_data())
    )
    LOG.info(f"Drug samples: {len(samples_drug)}")

    # Filter data-sets
Пример #4
0
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("reports", "eg/")

if __name__ == "__main__":
    # Data-sets
    #

    wes_obj = WES()

    mobem_obj = Mobem()
    cn_obj = CopyNumber()

    prot_obj = Proteomics()
    gexp_obj = GeneExpression()

    crispr_obj = CRISPR()
    drug_obj = DrugResponse()

    # Samples
    #
    samples = set.intersection(set(prot_obj.get_data()))
    LOG.info(f"Samples: {len(samples)}")

    # Filter data-sets
    #
    prot = prot_obj.filter(subset=samples)
    LOG.info(f"Proteomics: {prot.shape}")

    gexp = gexp_obj.filter(subset=samples)
    LOG.info(f"Transcriptomics: {gexp.shape}")
RPATH = pkg_resources.resource_filename("reports", "eg/")
TPATH = pkg_resources.resource_filename("tables", "/")


if __name__ == "__main__":
    # Data-sets
    #
    prot_obj = Proteomics()
    prot = prot_obj.filter()
    LOG.info(f"Proteomics: {prot.shape}")

    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter(subset=list(prot))
    LOG.info(f"Transcriptomics: {gexp.shape}")

    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(subset=list(prot))
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()

    drespo = drespo_obj.filter()
    drespo.index = [";".join(map(str, i)) for i in drespo.index]

    dmax = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[
        "max_screening_conc"
    ].first()
    dmax = (dmax * 0.5).pipe(np.log)
    dmax.index = [";".join(map(str, i)) for i in dmax.index]

    dtargets = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[