def create_population(number_of_individual, data_file_name, intervals_min, intervals_max): """ -> Create a population of random initialized individuals """ pack = dichotomization.extract_matrix_from(data_file_name) variable_to_position = pack[1] population = [] for x in range(0, number_of_individual): individual = Individual() individual._id = x individual._intervals_to_variables = create_random_individual(variable_to_position, intervals_min, intervals_max) population.append(individual) return population
def run_ag_exploration(data_file, number_of_individual_per_generation, max_iteration, score_method, filter_strat): """ -> Run the genetic algorithm -> data_file used in the evaluation process -> number_of_individual_per_generation is an int -> max_iteration is an int -> score_method is a string, the method to use for scoring, could be: - nn (neural network) - svm (support vector machine) - tree (decision tree) -> filter_strat is a string, name of the filter aplly n cohorte, could be: - random - any disesae (SLE, SjS ...) -> return nothing but write a few results file in DATA/EXPLORATION """ #--------------------# # General parameters # #--------------------# progress = 0 mutation_rate = 5 intervals_min = 2 intervals_max = 80 number_of_good_parents = 6 number_of_bad_parents = 2 score_threshold = 80 result_file_name = "undef" solution_file_name = "undef" filter_name = "control_vs_"+str(filter_strat) time_file = open("time.log", "w") time_file.close() # ZONE 1 START start_zone_1 = time.clock() if(platform.system() == "Linux"): result_file_name = data_file.split("/") result_file_name = result_file_name[-1] result_file_name = result_file_name.split(".") max_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_max.csv" min_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_min.csv" result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+".csv" result_file_name = "DATA/EXPLORATION/"+result_file_name max_result_file_name = "DATA/EXPLORATION/"+max_result_file_name min_result_file_name = "DATA/EXPLORATION/"+min_result_file_name solution_file_name = data_file.split("/") solution_file_name = solution_file_name[-1] solution_file_name = solution_file_name.split(".") solution_file_name = solution_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_FixeStep.log" solution_file_name = "DATA/EXPLORATION/"+solution_file_name elif(platform.system() == "Windows"): result_file_name = data_file.split("\\") result_file_name = result_file_name[-1] result_file_name = result_file_name.split(".") max_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_max.csv" min_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_min.csv" result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+".csv" result_file_name = "DATA\\EXPLORATION\\"+result_file_name max_result_file_name = "DATA\\EXPLORATION\\"+max_result_file_name min_result_file_name = "DATA\\EXPLORATION\\"+min_result_file_name solution_file_name = data_file.split("\\") solution_file_name = solution_file_name[-1] solution_file_name = solution_file_name.split(".") solution_file_name = solution_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_FixeStep.log" solution_file_name = "DATA\\EXPLORATION\\"+solution_file_name # ZONE 1 END end_zone_1 = time.clock() - start_zone_1 time_file = open("time.log", "a") time_file.write("zone1,"+str(end_zone_1)+"\n") time_file.close() #--------------------# # Prepare Population # #--------------------# # ZONE 2 START start_zone_2 = time.clock() # Generate matrix from data file pack = dichotomization.extract_matrix_from(data_file) data = pack[0] variable_to_position = pack[1] # init population pop = create_population(number_of_individual_per_generation, data_file, intervals_min, intervals_max) # ZONE 2 END end_zone_2 = time.clock() - start_zone_2 time_file = open("time.log", "a") time_file.write("zone2,"+str(end_zone_2)) time_file.close() # init results files result_file = open(result_file_name, "w") result_file.close() max_result_file = open(max_result_file_name, "w") max_result_file.close() min_result_file = open(min_result_file_name, "w") min_result_file.close() for x in range(0, max_iteration): # ZONE 3 START start_zone_3 = time.clock() #-------------------------# # Evaluate the individual # #-------------------------# # evaluate population g = grade_population(pop, data_file, score_method, filter_strat, True) # ZONE 3 END end_zone_3 = time.clock() - start_zone_3 time_file = open("time.log", "a") time_file.write("zone3,"+str(end_zone_3)+"\n") time_file.close() # write result in file result_file = open(result_file_name, "a") result_file.write(str(x)+","+str(g[0])+"\n") result_file.close() # write solution in file if one of the individual in # population looks like a good solution (i.e score >= threshold) save_pop = False score_list = [] for individual in pop: individual_score = g[1][individual._id] score_list.append(individual_score) if(float(individual_score) >= float(score_threshold)): save_pop = True if(save_pop): solution_file_name_processed = solution_file_name.replace("FixeStep", str(progress)) solution_file = open(solution_file_name_processed, "w") for individual in pop: solution_file.write(">"+str(individual._id)+","+str(g[1][individual._id])+"\n") for key in individual._intervals_to_variables.keys(): solution_file.write(str(key) +","+str(individual._intervals_to_variables[key])+"\n") solution_file.close() # Get the best score in population and write # the result in a file best_score = max(score_list) max_score_file = open(max_result_file_name, "a") max_score_file.write(str(progress)+","+str(best_score)+"\n") max_score_file.close() # Get the worst score in population and write # the result in a file worst_score = min(score_list) min_score_file = open(min_result_file_name, "a") min_score_file.write(str(progress)+","+str(worst_score)+"\n") min_score_file.close() #--------# # Evolve # #--------# # ZONE 4 START start_zone_4 = time.clock() # => Get the Bests in population bests = get_best_individual_in_population(number_of_good_parents, g, pop) # => Randomly select bad individuals bads = random_selection_of_bad_candidates(bests, pop, number_of_bad_parents) # => Mutate a small random portion of the population parents = bests + bads mutation(mutation_rate, intervals_min, intervals_max, parents) # => crossover parents to create children children = create_children(parents, pop) # => Merge parent and child to constitute the next population parents.extend(children) # progress bar step = float((100/float(max_iteration))) progress += 1 progress_perc = progress*step factor = math.ceil((progress_perc/2)) progress_bar = "#" * int(factor) progress_bar += "-" * int(50 - factor) display_line = "["+str(score_method)+"]|"+progress_bar+"|"+str(progress)+"|"+str(g[0]) sys.stdout.write("\r%d%%" % progress_perc) sys.stdout.write(display_line) sys.stdout.flush() # ZONE 4 END end_zone_4 = time.clock() - start_zone_4 time_file = open("time.log", "a") time_file.write("zone4,"+str(end_zone_4)+"\n") time_file.close()
def evaluate_individual(individual, data_file_name, method, filter_strat): """ -> Evaluate the individual using NN project -> individual is a Individual object -> data_file_name is the matrix file name -> method is a string, the method to use to compute the score, could be: - nn (for neural network) - svm (for support vector machine) - tree (decision tree) -> filter_strat is a string, name of the filter aplly n cohorte, could be: - random - any disesae (SLE, SjS ...) -> run evaluation script in the NN folder """ # Generate matrix from data file pack = dichotomization.extract_matrix_from(data_file_name) data = pack[0] variable_to_position = pack[1] # Create disjonct Table for Matrix disjonctif_tables = create_disjonctTable_for_matrix( data, variable_to_position, individual._intervals_to_variables) # use disjonct table for dichotomization # - use matrix and table as input # - return a new matrix data_dichotomized = dichotomize(data, disjonctif_tables) if (platform.system() == "Windows"): save_file_name = "DATA\\MATRIX\\data_dichotomized_pattern_individual_to_evaluate.csv" elif (platform.system() == "Linux"): save_file_name = "DATA/MATRIX/data_dichotomized_pattern_individual_to_evaluate.csv" save_dichotomized_matrix_in_file(pack[1], pack[2], data_dichotomized, individual._intervals_to_variables, save_file_name) # compute the score if (method == "nn"): # Run the NN and clean the data if (platform.system() == "Windows"): os.chdir("..\\..\\NN") os.system("python evaluation.py " + str(filter_strat)) os.chdir( "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample" ) elif (platform.system() == "Linux"): os.chdir("../../NN") os.system("python evaluation.py " + str(filter_strat)) os.chdir( "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample") elif (method == "svm"): # Run SVM evaluation in NN folder if (platform.system() == "Windows"): os.chdir("..\\..\\NN") os.system("python svm_evaluation.py " + str(filter_strat)) os.chdir( "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample" ) elif (platform.system() == "Linux"): os.chdir("../../NN") os.system("python svm_evaluation.py " + str(filter_strat)) os.chdir( "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample") elif (method == "tree"): # Run decision tree evaluation in NN folder if (platform.system() == "Windows"): os.chdir("..\\..\\NN") os.system("python svm_evaluation.py " + str(filter_strat)) os.chdir( "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample" ) elif (platform.system() == "Linux"): os.chdir("../../NN") os.system("python tree_evaluation.py " + str(filter_strat)) os.chdir( "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample") else: print "[ERROR] method: " + str(method) + " is not recognized" #os.remove(save_file_name) # Get the score score = -1 if (platform.system() == "Windows"): score_file = open("..\\..\\NN\\evaluation_score.log", "r") elif (platform.system() == "Linux"): score_file = open("../../NN/evaluation_score.log", "r") for line in score_file: line = line.split("\n") line = line[0] score = line score_file.close() return score
def evaluate_individual(individual, data_file_name, method, filter_strat): """ -> Evaluate the individual using NN project -> individual is a Individual object -> data_file_name is the matrix file name -> method is a string, the method to use to compute the score, could be: - nn (for neural network) - svm (for support vector machine) - tree (decision tree) -> filter_strat is a string, name of the filter aplly n cohorte, could be: - random - any disesae (SLE, SjS ...) -> return a dict {score, individual_id} """ # Generate matrix from data file pack = dichotomization.extract_matrix_from(data_file_name) data = pack[0] variable_to_position = pack[1] # Create disjonct Table for Matrix disjonctif_tables = create_disjonctTable_for_matrix(data, variable_to_position, individual._intervals_to_variables) # use disjonct table for dichotomization # - use matrix and table as input # - return a new matrix data_dichotomized = dichotomize(data, disjonctif_tables) if(platform.system() == "Windows"): save_file_name = "DATA\\MATRIX\\data_dichotomized_pattern_individual_to_evaluate_"+str(individual._id)+".csv" elif(platform.system() == "Linux"): save_file_name = "DATA/MATRIX/data_dichotomized_pattern_individual_to_evaluate.csv" save_dichotomized_matrix_in_file(pack[1], pack[2], data_dichotomized, individual._intervals_to_variables, save_file_name) # compute the score if(method == "nn"): # Run the NN and clean the data nn_evaluation.run_nn_scoring(filter_strat, individual._id) elif(method == "svm"): # Run SVM evaluation in NN folder svm_evaluation.run_svm_scoring(filter_strat, individual._id) elif(method == "tree"): # Run decision tree evaluation in NN folder tree_evaluation.run_tree_scoring(filter_strat, individual._id) else: print "[ERROR] method: "+str(method)+" is not recognized" #os.remove(save_file_name) # Get the score score = -1 score_file = open("evaluation_score.log", "r") for line in score_file: line = line.split("\n") line = line[0] score = line score_file.close() results = {} results["score"] = float(score) results["id"] = individual._id return results
amplitude = max_interval - 2 step = float((100 / amplitude)) # progress bar progress += 1 progress_perc = progress * step factor = math.ceil((progress_perc / 2)) progress_bar = "#" * int(factor) progress_bar += "-" * int(50 - factor) display_line = "[panel " + str(panel) + "]|" + progress_bar + "|" sys.stdout.write("\r%d%%" % progress_perc) sys.stdout.write(display_line) sys.stdout.flush() # Generate matrix from data file pack = dichotomization.extract_matrix_from( "DATA/MATRIX/panel_" + str(panel) + "_filtered_processed.txt") data = pack[0] # create disjonct table for all variable in a matrix # -> input : a matrix # -> output : dict of table {variableIndex : disjonctTable} tables_test = dichotomization.create_disjonctTable_for_matrix( data, number_of_interval) # use disjonct table for dichotomization # - use matrix and table as input # - return a new matrix truc = dichotomization.dichotomize(data, tables_test) dichotomization.save_dichotomized_matrix_in_file( pack[1], pack[2], truc, number_of_interval,