-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
38 lines (30 loc) · 3.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# ----------------------------------------------------------------------Imports------------------------------------------------------------------------------
import pandas as pd
from helpers import (clean_data, get_pearson, get_max, get_min, scatter_plot,
p_values_paired, p_values_ind, compare)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------Load the data---------------------------------------------------------------------------
healthy = pd.read_csv('lusc-rsem-fpkm-tcga_paired.txt', sep='\t')
cancerous = pd.read_csv('lusc-rsem-fpkm-tcga-t_paired.txt', sep='\t')
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------Clean the data--------------------------------------------------------------------------
clean_data(healthy, cancerous, 25)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------Get Pearson CC--------------------------------------------------------------------------
correlation, indexes = get_pearson(healthy, cancerous)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -----------------------------------------------------------------Get Max/Min Index-------------------------------------------------------------------------
max_key, max_value, max_index = get_max(correlation, indexes)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -----------------------------------------------------------------Plot Max/Min Gene-------------------------------------------------------------------------
scatter_plot(healthy, cancerous, max_index[0], xlabel='H_Expression_Level', ylabel='C_Expression_Level', title='Maximum Correlation Gene')
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------Get the p-value as paired--------------------------------------------------------------------
diffrentially_genes_paired = p_values_paired(healthy, cancerous, 0.05)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------Get the p-value as independant-----------------------------------------------------------------
diffrentially_genes_ind = p_values_ind(healthy, cancerous, 0.05)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------Compare the two-------------------------------------------------------------------------
common, paired_only, ind_only = compare(diffrentially_genes_paired, diffrentially_genes_ind)
# -------------------------------------------------------------------Compare the two-------------------------------------------------------------------------