def main(): if len(sys.argv) != 2: print "Usage:python %s mouse/tcga_cancer_index" % sys.argv[0] exit() data_type = sys.argv[1] assert data_type == "mouse" or data_type.isdigit() if data_type.isdigit(): data_type = file_operations.get_tcga_disease_list()[int(data_type)] if data_type == "mouse": pcc_threshold = 0.9 else: # TCGA coefficients are worse. pcc_threshold = 0.5 # Read in the tsv file. gene_exp_dct = file_operations.get_gene_expression_dct(data_type) high_std_genes = file_operations.get_high_std_genes(data_type) gene_exp_matrix = create_gene_exp_matrix(gene_exp_dct, high_std_genes) r, p = corrcoef(gene_exp_matrix) out = open("./data/%s_data/high_std_network.txt" % data_type, "w") for row_idx, row in enumerate(r): for col_idx, pcc in enumerate(row): if col_idx <= row_idx or pcc < pcc_threshold or pcc == 1: continue # if p[row_idx][col_idx] > P_VALUE_THRESOLD: # continue # Write out gene information. gene_a, gene_b = (high_std_genes[row_idx], high_std_genes[col_idx]) out.write("%s\t%s\t%f\n" % (gene_a, gene_b, abs(pcc))) out.close()