示例#1
0
'''Load in run and CN data'''
run = pickle.load(open(report_path + '/RunObject.p', 'rb'))
cancer = run.load_cancer(cancer_type)

if data_type == 'broad':
    data = Dataset(cancer, run, 'CN_broad')
    data.features = data.df
    data.save()
    sys.exit(0)
    
data = Dataset(cancer, run, 'CN')
data.path = '_'.join([data.path, data_type])

if data_type == 'deletion':
    data.hit_val = -2
elif data_type == 'amplification':
    data.hit_val = 2
elif data_type == 'amplification_low':
    data.df = data.df.replace(1,2)
    data.hit_val = 2

hit_matrix = (data.df==data.hit_val).astype(float)

genes_in_bands = hit_matrix.groupby(level=0).size()
frac = (hit_matrix.groupby(level=0).sum().T / (genes_in_bands + 1)).T
hit_matrix = hit_matrix.mul((frac <= .25).astype(float), level=0, 
                            fill_value=0)
hit_matrix = hit_matrix[hit_matrix.sum(1) > 0]
hit_matrix.index = hit_matrix.index.reorder_levels([2,1,0])