parser.add_option("-o", "--foutput", dest="output", help="output file", metavar="FILE") parser.add_option("-m", "--model", dest="model", help="network_type", metavar="str") (options, args) = parser.parse_args() ############################################################################################################################################### # MAIN ############################################################################################################################################### import numpy as np file_output = open(options.output, 'w') dictionary = fn.build_dictionary(options.dictionary, options.key_col, options.value_col) network = open(options.network) for line in network: line = line.rstrip("\n") hp1, hp2 = line.split("\t") #two or three fields if hp1 in dictionary: rels_1 = dictionary[hp1] else : rels_1 = [] if hp2 in dictionary: rels_2 = dictionary[hp2] else : rels_2 = []
dest="value_id", help="column which have values identificators", type='int') parser.add_option("-o", "--output_file", dest="output_file", help="output", metavar="FILE") (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### gene_dict = fn.build_dictionary(options.dictionary_file, options.key_id, options.value_id) file = open(options.patient_file) output_file = open(options.output_file, 'w') pat2gene_dict = dict() for line in file: line = line.rstrip("\n") patient, gene = line.split("\t") if patient not in pat2gene_dict: def_gene = "".join(gene_dict[gene]) pat2gene_dict[patient] = [def_gene] else:
dest="loci_id", help="loci id", type='int') parser.add_option("-b", "--gene value", dest="gene_value", help="column of genes", type='int') (options, arg) = parser.parse_args() ############################################################################################################################################## # MAIN ############################################################################################################################################## pat2loci_dictionary = fn.build_dictionary(options.pat2loci_file, options.pat_id, options.pat_loci_value) loci2gene_dictionary = fn.build_dictionary(options.loci2phen_file, options.loci_id, options.gene_value) #for each patient associated to a number of locis for patient, locis in pat2loci_dictionary.items(): for loci in locis: #for each loci associated to a patient if loci in loci2gene_dictionary: #get the genes for each loci genes = loci2gene_dictionary[loci] for gene in genes: print(patient, gene,
kegg_hpos = load_dictionary(options.kegg_cluster_file, options.key_cluster, options.value_cluster) kegg_gene = load_dictionary(options.kegg_cluster_file, options.key_cluster, options.gene_col) reactome_hpos = load_dictionary(options.reactome_cluster_file, options.key_cluster, options.value_cluster) reactome_gene = load_dictionary(options.reactome_cluster_file, options.key_cluster, options.gene_col) pat2gene_dict = load_dictionary(options.gene_file, options.key_gene, options.value_gene) #print(patient_hpos_overlap_in_cluster) patient_profile_dict = fn.build_dictionary(options.patient_file, options.key_patient, options.value_patient) output_file = open(options.output_file, 'w') patient_hpo_file = open(options.patient_file) output_file.write("Patient" + "\t" + "Metric" + "\t" + "Number" + "\n") patient_l = [] for line in patient_hpo_file: line = line.rstrip("\n") fields = line.split("\t") patient = fields[options.key_patient]
dest="freq_col", help="column which have non neuromuscular frequency", type='int') parser.add_option("-p", "--percentage", dest="percentage", help="percentage", type='int') (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### neuromuscular_freq_dictionary = fn.build_dictionary( options.neuromuscular_frequency_table, options.hpo_id, options.freq_col) #print(neuromuscular_freq_dictionary) hpo_neuromuscular_l = fn.load_list_from_a_file(options.hpo_list) #print(len(hpo_neuromuscular_l)) freq_non_neuromuscular_dictionary = build_dictionary( options.non_neuromuscular_frequency_table, options.freq_col, options.hpo_id, hpo_neuromuscular_l) #print(freq_non_neuromuscular_dictionary) non_neuromuscular_hpo_l = [] for hpo in hpo_neuromuscular_l: if hpo in neuromuscular_freq_dictionary: freq = int("".join(neuromuscular_freq_dictionary[hpo]))
help="value col for main dictionary", type='int') parser.add_option("-l", "--file to analyse", dest="file_to_analyse", help="dictionary to analyse", metavar="FILE") parser.add_option("-B", "--key_id", dest="key_col_analyse", help="key id col for the dictionary to analyse", type='int') (options, args) = parser.parse_args() ############################################################################################################################################### # MAIN ############################################################################################################################################### hpo_dictionary = fn.build_dictionary(options.main_dictionary, options.key_col_dictionary, options.value_col_dictionary) cluster_file = open(options.file_to_analyse) print("hpo" + "\t" + "cluster" + "\t" + "name") for line in cluster_file: line = line.rstrip("\n") HPO, cluster = line.split("\t") if HPO in hpo_dictionary: print(line + "\t" + "".join(hpo_dictionary[HPO]))
parser.add_option("-n", "--neuromuscular_file", dest="neuromuscular_file", help="neuromuscular diseases file", metavar="FILE") parser.add_option("-C", "--neuromuscular_key_col", dest="neuromuscular_key_col", help="neuromuscular diseases key column", type='int') parser.add_option("-c", "--neuromuscular_value_col", dest="neuromuscular_value_col", help="neuromuscular diseases value column", type='int') (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### #load_dictionary all_diseases_dictionary = fn.build_dictionary(options.non_neuromuscular_file, options.neuromuscular_key_col, options.neuromuscular_value_col) neuromuscular_diseases_dictionary = fn.build_dictionary(options.neuromuscular_file, options.neuromuscular_key_col, options.neuromuscular_value_col) omim_dictionary = fn.build_dictionary(options.all_file, options.all_key_col, options.all_value_col) hpo_dictionary = fn.build_dictionary(options.all_file, options.all_key_col, options.hpo_name_col) #unique list of HPOs unique_hpo_l = omim_dictionary.keys() #test_dictionary #print(all_diseases_dictionary) #print(len(all_diseases_dictionary.keys())) #print(neuromuscular_diseases_dictionary) #print(len(neuromuscular_diseases_dictionary.keys()))
"--hpo", dest="hpo", help="column with HPO terms", type='int') parser.add_option("-y", "--system", dest="system", help="column with system terms", type='int') (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### hpo_dictionary = fn.build_dictionary(options.cluster_file, options.cluster_hpo_id, options.hpo) systems_dictionary = fn.build_dictionary(options.system_file, options.cluster_system_id, options.system) for cluster_id, systems in systems_dictionary.items(): if cluster_id in hpo_dictionary: all_systems = systems[:] first_system = all_systems.pop(-1) print(first_system, ','.join(hpo_dictionary[cluster_id]), sep="\t") while len(all_systems) > 0: second_system = all_systems.pop(-1)
parser.add_option("-x", "--cluster_value", dest="cluster_value", help="column with HPO terms", type='int') parser.add_option("-y", "--patient_value", dest="patient_value", help="column with hpo terms", type='int') parser.add_option("-l", "--length_hpo_profile", dest="length_hpo_profile", help="minimun length of the hpo profile", type='int') parser.add_option("-s", "--length_shared_hpo", dest="number_of_shared_hpo", help="minimun number of shared hpo with the cluster", type='int') (options, arg) = parser.parse_args() ############################################################################################################################################################################# # MAIN # ############################################################################################################################################################################# patient_dictionary = fn.build_dictionary(options.patients_file, options.patient_id, options.patient_value) #dictionary with patients as keys and hpos as values cluster_dictionary = fn.build_dictionary(options.clusters_file, options.cluster_id, options.cluster_value) #dictionary with clusters_ids as keyas and hpos as values print("Cluster" + "\t" + "Patient" + "\t" + "Shared_hpo" + "\t" + "Patient_profile") #output file's header #for each patient and their hpos for patient, HPO_patient in patient_dictionary.items() : if len(HPO_patient) > options.length_hpo_profile: #If patients profile is greater than a certain threshold #for each cluster and their hpos for cluster, HPO_cluster in cluster_dictionary.items(): shared_hpo = list(set(HPO_cluster) & set(HPO_patient)) #Get the common hpos for the patient and the cluster not_in_cluster = [x for x in HPO_patient if x not in shared_hpo] #Get the hpos of the patient that isn't in the cluster
"--hpo_disease", dest="hpo_disease", help="column with HPO terms", type='int') parser.add_option("-t", "--thresehold", dest="thresehold", help="permited errors", type='float') (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### a_pairs = fn.build_dictionary(options.cluster_file, options.cluster_id, options.hpo_cluster) #return a dictionary b_pairs = fn.build_dictionary(options.diseases_dictionary, options.disease_id, options.hpo_disease) #return a dictionary # For each entry (cluster) in the dictionary calculate the number of values (HPO) for cluster_a in a_pairs: a_length = len(a_pairs[cluster_a]) #For each entry (disease) in the dictionary: for disease in b_pairs: intersection = list( set(a_pairs[cluster_a]) & set(b_pairs[disease]) ) #calculate the intersection of values of both dictionaries (commom elements). intersection_length = len( intersection) #Calculate the length of the intersection.
help="threshold number of HPO per cluster", type='float') parser.add_option("-c", "--number_cluster", dest="number_cluster", help="threshold number cluster", type='float') (options, arg) = parser.parse_args() ####################################################################################################################################### # MAIN ####################################################################################################################################### import numpy as np patient_dictionary = fn.build_dictionary( options.patient_file, options.patient_id, options.hpo_patients) #dictionary with patients as keys and hpos as values systems_dictionary = fn.build_dictionary( options.system_file, options.system_id, options.hpo_system ) #dictionary with hpos of a cluster as keys and systems as values print("Pat_id", "Prof_length", "Profile", "Number_of_HPO_explained", "Percetage_of_HPO_explained", "Nº_coincedence_clusters", "Cluster", "Avarage_HPO_per_cluster", "Higher_cluster_size", "Affected_HPO",
help="File with genes their symbols", metavar="FILE") parser.add_option("-I", "--key_gene_symbol_id", dest="gene_symbol_id", help="column which have genes id", type='int') parser.add_option("-i", "--gene_symbol_value", dest="gene_symbol_value", help="column with symbols", type='int') parser.add_option("-t", "--shared_hpo_threshold", dest="shared_hpo_threshold", help="shared_hpo_threshold", type='int') parser.add_option("-n", "--gene_number_threshold", dest="gene_number_threshold", help="gene_number_threshold", type='int') (options, arg) = parser.parse_args() ############################################################################################################################################################################# # MAIN # ############################################################################################################################################################################# clusters_hpo_dictionary = fn.build_dictionary(options.clusters_hpo_file, options.cluster_hpo_id, options.cluster_hpo_value) #return dictionary patients_hpo_dictionary = fn.build_dictionary(options.patients_hpo_file, options.patients_hpo_id, options.patients_hpo_value) #return dictionary clusters_systems_dictionary = fn.build_dictionary(options.cluster_system_file, options.cluster_system_id, options.cluster_system_value) #return dictionary systems_genes_dictionary = fn.build_dictionary(options.system_gene_file, options.system_gene_id, options.system_gene_value) #return dictionary patients_genes_dictionary = fn.build_dictionary(options.patient_gene_file, options.patient_gene_id, options.patient_gene_value) #return dictionary genes_symbol_dictionary = fn.build_dictionary(options.gene_symbol_file, options.gene_symbol_id, options.gene_symbol_value) #return dictionary print("Patient" + "\t" + "Cluster" + "\t" + "Shared_hpos" + "\t" + "Patient_profile" + "\t" + "Systems" + "\t" + "Genes") #For each patient and hpos in patient_hpo dictionary for patient, hpos in patients_hpo_dictionary.items(): patient_profile = set(hpos) #get a set of the patient profile if patient in patients_genes_dictionary: #look for the genes associated to the patient patient_genes = patients_genes_dictionary[patient] patient_genes_symbol = []
help="pvalue", metavar="float") (options, args) = parser.parse_args() ############################################################################################################################################### # MAIN ############################################################################################################################################### import numpy as np import os.path as path #If the principal file exits it makes a dictionary cluster HPO if path.exists( options.dictionary ): #if the dictionary has a length different to 0 append the length of every cluster in the empty list, esle append 0. dictionary = fn.build_dictionary(options.dictionary, options.cluster_id, options.item_id) size = [] #empty list if int(len(dictionary)) != 0: for cluster_id in dictionary: size.append(len(dictionary[cluster_id])) else: size.append(0) mean = np.mean(size) #Calculate the mean of the clusters length else: #If the dictionary has length 0 the mean of clusters size is 0 mean = 0 print(options.model_name + "\t" + options.model_type + "\t" + "Average_Cluster_size_" + options.enrichment + "_" + options.pvalue +