示例#1
0
df_encoded = pd.concat(concat, ignore_index=True)
#numerate patients from 0 to N-1, where N is the number patients
df_encoded['id_patient'] = df_encoded.index.tolist()
df_encoded.to_csv('patient_temporal_sequences_experiment1.csv')
print(df_encoded)

################################################################################
##            SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION
################################################################################
concat_for_final_decision = []
for gap in gap_values:

    print('GAP PENALTY:', gap)

    #pairwise sequence alignment results
    results = main_algorithm(df_encoded, gap, T, s, 0)

    #reset indexes
    df_encoded = df_encoded.reset_index()

    #convert similarity matrix into distance matrix
    results['score'] = convert_to_distance_matrix(results['score'])

    #exception when all the scores are the same, in this case we continue with the next value of gap
    if ((results['score'] == 0).all()):
        print('entrei')
        continue
    else:
        #hierarchical clustering
        Z = hierarchical_clustering(results['score'], method, gap)
        print('number of experiment:', i)
        #initialize list that will contain the auxliary dataframes to be concataneted
        concat = []

        #generate sequences
        df_encoded = generate_dataset(n_sequences, dataset)

        ###########################################################################
        ##            SEQUENCE ALIGNMENT, HIERARCHICAL CLUSTERING & VALIDATION
        ###########################################################################
        concat_for_final_decision = []
        for gap in gap_values:
            #print(gap)

            #pairwise sequence alignment results
            results = main_algorithm(df_encoded, gap, T, s, 0)

            #reset indexes
            df_encoded = df_encoded.reset_index()

            #convert similarity matrix into distance matrix
            results['score'] = convert_to_distance_matrix(results['score'])

            #exception when all the scores are the same, in this case we continue with the next value of gap
            if ((results['score'] == 0).all()):
                continue
            else:
                #hierarchical clustering
                Z = hierarchical_clustering(results['score'], method, gap)

                #validation