Пример #1
0
parser.add_option("-o", "--foutput", dest="output",
                  help="output file", metavar="FILE")

parser.add_option("-m", "--model", dest="model",
                  help="network_type", metavar="str")

(options, args) = parser.parse_args()

###############################################################################################################################################
# 															MAIN
###############################################################################################################################################
import numpy as np

file_output = open(options.output, 'w')
dictionary = fn.build_dictionary(options.dictionary, options.key_col, options.value_col)
network = open(options.network)

for line in network:
	line = line.rstrip("\n")
	hp1, hp2 = line.split("\t") #two or three fields
	
	if hp1 in dictionary:
		rels_1 = dictionary[hp1]
	else :
		rels_1 = []

	if hp2 in dictionary:
		rels_2 = dictionary[hp2]
	else :
		rels_2 = []
                  dest="value_id",
                  help="column which have values identificators",
                  type='int')

parser.add_option("-o",
                  "--output_file",
                  dest="output_file",
                  help="output",
                  metavar="FILE")

(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################
gene_dict = fn.build_dictionary(options.dictionary_file, options.key_id,
                                options.value_id)

file = open(options.patient_file)

output_file = open(options.output_file, 'w')

pat2gene_dict = dict()
for line in file:
    line = line.rstrip("\n")

    patient, gene = line.split("\t")

    if patient not in pat2gene_dict:
        def_gene = "".join(gene_dict[gene])
        pat2gene_dict[patient] = [def_gene]
    else:
Пример #3
0
                  dest="loci_id",
                  help="loci id",
                  type='int')
parser.add_option("-b",
                  "--gene value",
                  dest="gene_value",
                  help="column of genes",
                  type='int')

(options, arg) = parser.parse_args()

##############################################################################################################################################
#															MAIN
##############################################################################################################################################
pat2loci_dictionary = fn.build_dictionary(options.pat2loci_file,
                                          options.pat_id,
                                          options.pat_loci_value)
loci2gene_dictionary = fn.build_dictionary(options.loci2phen_file,
                                           options.loci_id, options.gene_value)

#for each patient associated to a number of locis
for patient, locis in pat2loci_dictionary.items():

    for loci in locis:  #for each loci associated to a patient

        if loci in loci2gene_dictionary:  #get the genes for each loci
            genes = loci2gene_dictionary[loci]

            for gene in genes:

                print(patient, gene,
kegg_hpos = load_dictionary(options.kegg_cluster_file, options.key_cluster,
                            options.value_cluster)
kegg_gene = load_dictionary(options.kegg_cluster_file, options.key_cluster,
                            options.gene_col)

reactome_hpos = load_dictionary(options.reactome_cluster_file,
                                options.key_cluster, options.value_cluster)
reactome_gene = load_dictionary(options.reactome_cluster_file,
                                options.key_cluster, options.gene_col)

pat2gene_dict = load_dictionary(options.gene_file, options.key_gene,
                                options.value_gene)

#print(patient_hpos_overlap_in_cluster)
patient_profile_dict = fn.build_dictionary(options.patient_file,
                                           options.key_patient,
                                           options.value_patient)

output_file = open(options.output_file, 'w')
patient_hpo_file = open(options.patient_file)

output_file.write("Patient" + "\t" + "Metric" + "\t" + "Number" + "\n")

patient_l = []

for line in patient_hpo_file:

    line = line.rstrip("\n")
    fields = line.split("\t")

    patient = fields[options.key_patient]
Пример #5
0
                  dest="freq_col",
                  help="column which have non neuromuscular frequency",
                  type='int')

parser.add_option("-p",
                  "--percentage",
                  dest="percentage",
                  help="percentage",
                  type='int')

(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################
neuromuscular_freq_dictionary = fn.build_dictionary(
    options.neuromuscular_frequency_table, options.hpo_id, options.freq_col)
#print(neuromuscular_freq_dictionary)

hpo_neuromuscular_l = fn.load_list_from_a_file(options.hpo_list)
#print(len(hpo_neuromuscular_l))

freq_non_neuromuscular_dictionary = build_dictionary(
    options.non_neuromuscular_frequency_table, options.freq_col,
    options.hpo_id, hpo_neuromuscular_l)
#print(freq_non_neuromuscular_dictionary)

non_neuromuscular_hpo_l = []

for hpo in hpo_neuromuscular_l:
    if hpo in neuromuscular_freq_dictionary:
        freq = int("".join(neuromuscular_freq_dictionary[hpo]))
Пример #6
0
                  help="value col for main dictionary",
                  type='int')
parser.add_option("-l",
                  "--file to analyse",
                  dest="file_to_analyse",
                  help="dictionary to analyse",
                  metavar="FILE")
parser.add_option("-B",
                  "--key_id",
                  dest="key_col_analyse",
                  help="key id col for the dictionary to analyse",
                  type='int')

(options, args) = parser.parse_args()

###############################################################################################################################################
# 															MAIN
###############################################################################################################################################
hpo_dictionary = fn.build_dictionary(options.main_dictionary,
                                     options.key_col_dictionary,
                                     options.value_col_dictionary)

cluster_file = open(options.file_to_analyse)

print("hpo" + "\t" + "cluster" + "\t" + "name")

for line in cluster_file:
    line = line.rstrip("\n")
    HPO, cluster = line.split("\t")
    if HPO in hpo_dictionary:
        print(line + "\t" + "".join(hpo_dictionary[HPO]))
Пример #7
0

parser.add_option("-n", "--neuromuscular_file", dest="neuromuscular_file", 
                  help="neuromuscular diseases file", metavar="FILE")
parser.add_option("-C", "--neuromuscular_key_col", dest="neuromuscular_key_col", 
                  help="neuromuscular diseases key column", type='int')
parser.add_option("-c", "--neuromuscular_value_col", dest="neuromuscular_value_col", 
                  help="neuromuscular diseases value column", type='int')
(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################

#load_dictionary
all_diseases_dictionary = fn.build_dictionary(options.non_neuromuscular_file, options.neuromuscular_key_col, options.neuromuscular_value_col)
neuromuscular_diseases_dictionary = fn.build_dictionary(options.neuromuscular_file, options.neuromuscular_key_col, options.neuromuscular_value_col)
omim_dictionary = fn.build_dictionary(options.all_file, options.all_key_col, options.all_value_col)

hpo_dictionary = fn.build_dictionary(options.all_file, options.all_key_col, options.hpo_name_col)

#unique list of HPOs
unique_hpo_l = omim_dictionary.keys()

#test_dictionary
#print(all_diseases_dictionary)
#print(len(all_diseases_dictionary.keys()))

#print(neuromuscular_diseases_dictionary)
#print(len(neuromuscular_diseases_dictionary.keys()))
Пример #8
0
                  "--hpo",
                  dest="hpo",
                  help="column with HPO terms",
                  type='int')
parser.add_option("-y",
                  "--system",
                  dest="system",
                  help="column with system terms",
                  type='int')

(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################
hpo_dictionary = fn.build_dictionary(options.cluster_file,
                                     options.cluster_hpo_id, options.hpo)
systems_dictionary = fn.build_dictionary(options.system_file,
                                         options.cluster_system_id,
                                         options.system)

for cluster_id, systems in systems_dictionary.items():

    if cluster_id in hpo_dictionary:
        all_systems = systems[:]
        first_system = all_systems.pop(-1)

        print(first_system, ','.join(hpo_dictionary[cluster_id]), sep="\t")

        while len(all_systems) > 0:
            second_system = all_systems.pop(-1)
Пример #9
0
parser.add_option("-x", "--cluster_value", dest="cluster_value", 
                  help="column with HPO terms", type='int')
parser.add_option("-y", "--patient_value", dest="patient_value", 
                  help="column with hpo terms", type='int')
parser.add_option("-l", "--length_hpo_profile", dest="length_hpo_profile",
				help="minimun length of the hpo profile", type='int')
parser.add_option("-s", "--length_shared_hpo", dest="number_of_shared_hpo",
				help="minimun number of shared hpo with the cluster", type='int')


(options, arg) = parser.parse_args()
#############################################################################################################################################################################
#																		MAIN																								#
#############################################################################################################################################################################

patient_dictionary = fn.build_dictionary(options.patients_file, options.patient_id, options.patient_value) #dictionary with patients as keys and hpos as values
cluster_dictionary = fn.build_dictionary(options.clusters_file, options.cluster_id, options.cluster_value) #dictionary with clusters_ids as keyas and hpos as values

print("Cluster" + "\t" + "Patient" + "\t" + "Shared_hpo" + "\t" + "Patient_profile")		#output file's header

#for each patient and their hpos
for patient, HPO_patient in patient_dictionary.items() :

	if len(HPO_patient) > options.length_hpo_profile:		#If patients profile is greater than a certain threshold

		#for each cluster and their hpos
		for cluster, HPO_cluster in cluster_dictionary.items():

			shared_hpo = list(set(HPO_cluster) & set(HPO_patient))		#Get the common hpos for the patient and the cluster

			not_in_cluster = [x for x in HPO_patient if x not in shared_hpo]	#Get the hpos of the patient that isn't in the cluster
Пример #10
0
                  "--hpo_disease",
                  dest="hpo_disease",
                  help="column with HPO terms",
                  type='int')
parser.add_option("-t",
                  "--thresehold",
                  dest="thresehold",
                  help="permited errors",
                  type='float')

(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################
a_pairs = fn.build_dictionary(options.cluster_file, options.cluster_id,
                              options.hpo_cluster)  #return a dictionary
b_pairs = fn.build_dictionary(options.diseases_dictionary, options.disease_id,
                              options.hpo_disease)  #return a dictionary

# For each entry (cluster) in the dictionary calculate the number of values (HPO)
for cluster_a in a_pairs:
    a_length = len(a_pairs[cluster_a])

    #For each entry (disease) in the dictionary:
    for disease in b_pairs:
        intersection = list(
            set(a_pairs[cluster_a]) & set(b_pairs[disease])
        )  #calculate the intersection of values of both dictionaries (commom elements).
        intersection_length = len(
            intersection)  #Calculate the length of the intersection.
Пример #11
0
                  help="threshold number of HPO per cluster",
                  type='float')
parser.add_option("-c",
                  "--number_cluster",
                  dest="number_cluster",
                  help="threshold number cluster",
                  type='float')
(options, arg) = parser.parse_args()

#######################################################################################################################################
#														MAIN
#######################################################################################################################################
import numpy as np

patient_dictionary = fn.build_dictionary(
    options.patient_file, options.patient_id,
    options.hpo_patients)  #dictionary with patients as keys and hpos as values
systems_dictionary = fn.build_dictionary(
    options.system_file, options.system_id, options.hpo_system
)  #dictionary with hpos of a cluster as keys and systems as values

print("Pat_id",
      "Prof_length",
      "Profile",
      "Number_of_HPO_explained",
      "Percetage_of_HPO_explained",
      "Nº_coincedence_clusters",
      "Cluster",
      "Avarage_HPO_per_cluster",
      "Higher_cluster_size",
      "Affected_HPO",
					help="File with genes their symbols", metavar="FILE")
parser.add_option("-I", "--key_gene_symbol_id", dest="gene_symbol_id", 
                  help="column which have genes id", type='int')
parser.add_option("-i", "--gene_symbol_value", dest="gene_symbol_value", 
                  help="column with symbols", type='int')

parser.add_option("-t", "--shared_hpo_threshold", dest="shared_hpo_threshold", 
                  help="shared_hpo_threshold", type='int')
parser.add_option("-n", "--gene_number_threshold", dest="gene_number_threshold", 
                  help="gene_number_threshold", type='int')

(options, arg) = parser.parse_args()
#############################################################################################################################################################################
#																		MAIN																								#
#############################################################################################################################################################################
clusters_hpo_dictionary = fn.build_dictionary(options.clusters_hpo_file, options.cluster_hpo_id, options.cluster_hpo_value)				#return dictionary
patients_hpo_dictionary = fn.build_dictionary(options.patients_hpo_file, options.patients_hpo_id, options.patients_hpo_value)				#return dictionary
clusters_systems_dictionary = fn.build_dictionary(options.cluster_system_file, options.cluster_system_id, options.cluster_system_value)	#return dictionary
systems_genes_dictionary = fn.build_dictionary(options.system_gene_file, options.system_gene_id, options.system_gene_value)				#return dictionary
patients_genes_dictionary = fn.build_dictionary(options.patient_gene_file, options.patient_gene_id, options.patient_gene_value)			#return dictionary
genes_symbol_dictionary = fn.build_dictionary(options.gene_symbol_file, options.gene_symbol_id, options.gene_symbol_value)					#return dictionary

print("Patient" + "\t" + "Cluster" + "\t" + "Shared_hpos" + "\t" + "Patient_profile" + "\t" + "Systems" + "\t" + "Genes")

#For each patient and hpos in patient_hpo dictionary
for patient, hpos in patients_hpo_dictionary.items():
	patient_profile = set(hpos)										#get a set of the patient profile
	if patient in patients_genes_dictionary:						#look for the genes associated to the patient
		patient_genes = patients_genes_dictionary[patient]
		patient_genes_symbol = []
Пример #13
0
                  help="pvalue",
                  metavar="float")

(options, args) = parser.parse_args()

###############################################################################################################################################
# 															MAIN
###############################################################################################################################################
import numpy as np
import os.path as path

#If the principal file exits it makes a dictionary cluster HPO
if path.exists(
        options.dictionary
):  #if the dictionary has a length different to 0 append the length of every cluster in the empty list, esle append 0.
    dictionary = fn.build_dictionary(options.dictionary, options.cluster_id,
                                     options.item_id)

    size = []  #empty list
    if int(len(dictionary)) != 0:
        for cluster_id in dictionary:
            size.append(len(dictionary[cluster_id]))
    else:
        size.append(0)

    mean = np.mean(size)  #Calculate the mean of the clusters length

else:  #If the dictionary has length 0 the mean of clusters size is 0
    mean = 0

print(options.model_name + "\t" + options.model_type + "\t" +
      "Average_Cluster_size_" + options.enrichment + "_" + options.pvalue +