def save_pkl(self, fn): db = { 'decipher': self.decipher, 'orpha': self.orpha, 'omim': self.omim } save_object(db, fn)
def gen_patient_embeddings(source, EXP_ID, exp_id, enriched='no', n_same_time=None, exp_variable=None): start = time.time() if n_same_time: patients_phenotypes = \ './_data/patients/'+source+'_patients_phenotype_'+n_same_time+'.csv' patient_embeddings = \ './_data/patients/'+source+'_patient_embeddings_'+n_same_time+'.pkl' else: patients_phenotypes = \ './_data/patients/'+source+'_patients_phenotype.csv' patient_embeddings = \ './_data/patients/'+source+'_patient_embeddings.pkl' with open(patients_phenotypes) as csv_file: patient_sims = csv.reader(csv_file) hpo = Hpo() hpo_vectors = HpoVecs(enriched, EXP_ID, exp_id, exp_variable=exp_variable).vecs patients = {} for line in patient_sims: patients[line[0]] = \ compute_embedding_average(line[1:], hpo, hpo_vectors) save_object(patients, patient_embeddings) print(time.time() - start)
def save_pkl(self, filename): db = {} db['decipher'] = self.decipher db['decipher_desc'] = self.decipher_desc db['orpha'] = self.orpha db['orpha_desc'] = self.orpha_desc db['omim'] = self.omim db['omim_desc'] = self.omim_desc save_object(db, filename)
def learn_embeddings(self, walks): ''' Learn embeddings by optimizing the Skipgram objective using SGD. ''' # walks = [map(str, walk) for walk in walks] #py2 walks = [list(map(str, walk)) for walk in walks] save_object(walks, './_data/walks/walks_hpo_orpha.pkl') model = Word2Vec(walks, size=self.dimensions, window=self.window_size, min_count=0, sg=1, workers=self.workers, iter=self.iter) # model.save_word2vec_format(args.output) #deprecated model.wv.save_word2vec_format(self.output) return
def gen_mapping_objects(source, n_same_time=None): start = time.time() if n_same_time: with open('./_emu/emu-' + source + '_' + n_same_time + '.json') as json_file: patient_sims = json.load(json_file) f = open('_data/patients/' + source + '_patients_phenotype_' + n_same_time + '.csv', 'w', newline='') writer = csv.writer(f) patients_conditions = {} fn = '_data/patients/' + source + '_patients_disease_' + n_same_time + '.pkl' else: with open('./_emu/emu-' + source + '.json') as json_file: patient_sims = json.load(json_file) f = open('_data/patients/' + source + '_patients_phenotype.csv', 'w', newline='') writer = csv.writer(f) patients_conditions = {} fn = '_data/patients/' + source + '_patients_disease.pkl' i = 0 for disease in patient_sims: for phenotype in patient_sims[disease]['sims']: identifier = 'P' + str(i) id_arr = [identifier] for symptom in phenotype: id_arr.append(symptom) writer.writerow(id_arr) patients_conditions[identifier] = disease i += 1 f.close() save_object(patients_conditions, fn) print(time.time() - start)
walk_length = 50 iterations = 3 conds = 200 noise_ptgs = [.15, .3, .45, .6] patients_per_cond = 2 lamb = 4 enriched_embeddings = 'no' # print('Generating embeddings...') # start_time_symptom_embeddings = time.time() # genEmbeddings(input='_data/graph/hp-obo.edgelist', output='_data/emb/hp-obo_'+EXP_ID+'_'+str(exp_int)+'.emb', walk_length=walk_length, iter=iterations) # amount_time_symptom_embeddings = time.time()-start_time_symptom_embeddings rows = [] metadata_list = [] save_object(metadata_list, '_data/results/experiment_number' + EXP_ID + '.pkl') sim_names = { 'cos_sim', 'jaccard_best_avg', 'resnik_best_avg', 'lin_best_avg', 'jc_best_avg' } fig, ax = plt.subplots(2, 2) i = 0 for noise_ptg in noise_ptgs: aucs = { 'cos_sim': [], 'jaccard_best_avg': [], 'resnik_best_avg': [], 'lin_best_avg': [], 'jc_best_avg': [] }
def save_pkl(self, fn): save_object(self.hpos, fn)