patientEncounters[row[1]] = 1 if(addRow and int(row[7]) in [13,14,19,20,21]): #feature index 7 is discharge_disposition #patient was sent to a hospice or died addRow = False if(addRow): row = [row[j] for j in sub_set_indexes] if(row[-1]=='Yes'): row[-1] = 1 readmitted.append(row) else: row[-1] = 0 no_readmitted.append(row) print 'number of readmissions:', len(readmitted) sub_set = random.sample(no_readmitted, len(readmitted)) + readmitted random.shuffle(sub_set) data_writer.writerows(sub_set) repitedEncounters = {k:v for (k,v) in patientEncounters.items() if(v>1)} print len(repitedEncounters) print DatabaseManager.get_sub_feature_indexes() print DatabaseManager.get_left_out_feature_indexes() print DatabaseManager.get_indexes_to_scale() print DatabaseManager.get_indexes_to_encode() print DatabaseManager.get_indexes_to_hot_encode()