cnv = cnv_obj.filter(subset=list(prot)) cnv_norm = np.log2(cnv.divide(prot_obj.ss.loc[cnv.columns, "ploidy"]) + 1) LOG.info(f"Copy number: {cnv.shape}") # Overlaps # samples = list(set.intersection(set(prot), set(gexp), set(cnv))) genes = list( set.intersection(set(prot.index), set(gexp.index), set(cnv.index), set(prot_broad.index))) LOG.info(f"Genes: {len(genes)}; Samples: {len(samples)}") # Data tranformations # gexp_t = pd.DataFrame( {i: Utils.gkn(gexp.loc[i].dropna()).to_dict() for i in genes}).T ## # s_corr = pd.DataFrame({ s1: { s2: two_vars_correlation(prot[s1], gexp[s2])["corr"] for s2 in samples } for s1 in samples }) s_corr.to_csv( "/Users/Downloads/Proteomics_Transcriptomics_Corr_Matrix.csv") # Sample-wise Protein/Gene correlation with CopyNumber - Attenuation
stromal_count != 1].index)] # Import proteomics data-sets # dmatrix, ms_type, ctypes = [], [], [] for ctype, dfile in CPTAC_DATASETS: df = pd.read_csv(f"{CPTAC_DPATH}/linkedomics/{dfile}", sep="\t", index_col=0) if "COADREAD" in dfile: df = df.replace(0, np.nan) df = df.pipe(np.log2) df = pd.DataFrame( {i: Utils.gkn(df.loc[i].dropna()).to_dict() for i in df.index}).T # Simplify barcode df.columns = [i[:12].replace(".", "-") for i in df] # Cancer type ctypes.append(pd.Series(ctype, index=df.columns)) # MS type ms_type.append( pd.Series("LF" if "COADREAD" in dfile else "TMT", index=df.columns)) dmatrix.append(df)