LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("reports", "eg/") if __name__ == "__main__": # Data-sets # wes_obj = WES() mobem_obj = Mobem() cn_obj = CopyNumber() prot_obj = Proteomics() gexp_obj = GeneExpression() crispr_obj = CRISPR() drug_obj = DrugResponse() # Samples # samples = set.intersection(set(prot_obj.get_data())) LOG.info(f"Samples: {len(samples)}") # Filter data-sets # prot = prot_obj.filter(subset=samples) LOG.info(f"Proteomics: {prot.shape}") gexp = gexp_obj.filter(subset=samples)
# Gene information # ginfo = pd.read_csv(f"{TPATH}/mart_export.txt", sep="\t") ginfo["mean_pos"] = ginfo[["Gene end (bp)", "Gene start (bp)"]].mean(1) ginfo = ginfo[ginfo["Chromosome/scaffold name"].isin(Utils.CHR_ORDER)] ginfo_pos = pd.concat([ ginfo.groupby("Gene name")["Chromosome/scaffold name"].first().rename( "chr"), ginfo.groupby("Gene name")["mean_pos"].mean().rename("chr_pos"), ], axis=1) # Y matrices # gexp_obj = GeneExpression() gexp = gexp_obj.filter() LOG.info(f"Gexp: {gexp.shape}") prot_obj = Proteomics() prot = prot_obj.filter() prot = prot[prot.count(1) > 300] LOG.info(f"Prot: {prot.shape}") # X matrices # crispr_obj = CRISPR() crispr = crispr_obj.filter(dtype="merged") LOG.info(f"CRISPR: {crispr.shape}") drespo_obj = DrugResponse()
LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("reports", "eg/") TPATH = pkg_resources.resource_filename("tables", "/") if __name__ == "__main__": # Data-sets # prot_obj = Proteomics() prot = prot_obj.filter() LOG.info(f"Proteomics: {prot.shape}") gexp_obj = GeneExpression() gexp = gexp_obj.filter(subset=list(prot)) LOG.info(f"Transcriptomics: {gexp.shape}") crispr_obj = CRISPR() crispr = crispr_obj.filter(subset=list(prot)) LOG.info(f"CRISPR: {crispr.shape}") drespo_obj = DrugResponse() drespo = drespo_obj.filter() drespo.index = [";".join(map(str, i)) for i in drespo.index] dmax = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[ "max_screening_conc" ].first()