'''Load in run and CN data''' run = pickle.load(open(report_path + '/RunObject.p', 'rb')) cancer = run.load_cancer(cancer_type) if data_type == 'broad': data = Dataset(cancer, run, 'CN_broad') data.features = data.df data.save() sys.exit(0) data = Dataset(cancer, run, 'CN') data.path = '_'.join([data.path, data_type]) if data_type == 'deletion': data.hit_val = -2 elif data_type == 'amplification': data.hit_val = 2 elif data_type == 'amplification_low': data.df = data.df.replace(1,2) data.hit_val = 2 hit_matrix = (data.df==data.hit_val).astype(float) genes_in_bands = hit_matrix.groupby(level=0).size() frac = (hit_matrix.groupby(level=0).sum().T / (genes_in_bands + 1)).T hit_matrix = hit_matrix.mul((frac <= .25).astype(float), level=0, fill_value=0) hit_matrix = hit_matrix[hit_matrix.sum(1) > 0] hit_matrix.index = hit_matrix.index.reorder_levels([2,1,0])