df_encoded = pd.concat(concat, ignore_index=True) #numerate patients from 0 to N-1, where N is the number patients df_encoded['id_patient'] = df_encoded.index.tolist() df_encoded.to_csv('patient_temporal_sequences_experiment1.csv') print(df_encoded) ################################################################################ ## SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION ################################################################################ concat_for_final_decision = [] for gap in gap_values: print('GAP PENALTY:', gap) #pairwise sequence alignment results results = main_algorithm(df_encoded, gap, T, s, 0) #reset indexes df_encoded = df_encoded.reset_index() #convert similarity matrix into distance matrix results['score'] = convert_to_distance_matrix(results['score']) #exception when all the scores are the same, in this case we continue with the next value of gap if ((results['score'] == 0).all()): print('entrei') continue else: #hierarchical clustering Z = hierarchical_clustering(results['score'], method, gap)
print('number of experiment:', i) #initialize list that will contain the auxliary dataframes to be concataneted concat = [] #generate sequences df_encoded = generate_dataset(n_sequences, dataset) ########################################################################### ## SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION ########################################################################### concat_for_final_decision = [] for gap in gap_values: #print(gap) #pairwise sequence alignment results results = main_algorithm(df_encoded, gap, T, s, 0) #reset indexes df_encoded = df_encoded.reset_index() #convert similarity matrix into distance matrix results['score'] = convert_to_distance_matrix(results['score']) #exception when all the scores are the same, in this case we continue with the next value of gap if ((results['score'] == 0).all()): continue else: #hierarchical clustering Z = hierarchical_clustering(results['score'], method, gap) #validation