def save_excel(data): # json 파일 으로 저장하기 file = open("./recipe.json", "w+") file.write( json.dumps(data)) # json.dumps() 로 result 는 list 에서 string 으로 변경된다 # excel 로 저장하기 json_setting = pd.read_json('./recipe.json') xlsx_setting = pd.ExcelWriter('recipe.xlsx') json_setting.to_excel(xlsx_setting, 'sheet1') xlsx_setting.save()
def __init__(self): BaseIpamProcessing.__init__(self) # Pulls in interim data file for processing. self.workbook_file = os.path.join( self.dir_cls.processed_dir(), self.filename_cls.processed_filename() ) # Builds pandas writer object. self.writer = pd.ExcelWriter(self.workbook_file, engine='xlsxwriter')
def one_combination(): """ Actually runs the algorithm with one set of parameters. Names the resume file from parameters of search Creates the resume file from all the runs for a set of parameters """ log_base_dir = "./log/" # Base dir for log of initial runs if not path.exists(log_base_dir): mkdir(log_base_dir) all_dir = "./log_all/" # Base dir for resume the resume files if not path.exists(all_dir): mkdir(all_dir) log_name = ( "I-" + str(valid_Init.get(params.get("Initialization-Approach"))) + "_S-" + str(valid_Select.get(params.get("Selection-Approach"))) + "_C-" + str(valid_Xover.get(params.get("Crossover-Approach"))) + "_M-" + str(valid_Mutation.get(params.get("Mutation-Approach"))) + "_R-" + str(valid_Replacement.get(params.get("Replacement-Approach"))) + "_CP-" + str((params.get("Crossover-Probability"))) + "_MP-" + str( (params.get("Mutation-Probability"))) + "_PS-" + str( (params.get("Population-Size"))) + "_TS-" + str( (params.get("Tournament-Size"))) + "_G-" + str( (params.get("Number-of-Generations")))) resume_name = f"{all_dir}{log_name}.xlsx" # checks if the same run as already been performed, if so, skips it. # ----------------- log_dir = str(log_base_dir) + str(log_name) if not path.exists(log_dir): mkdir(log_dir) runs_made = [f for f in listdir(all_dir) if isfile(join(all_dir, f))] if (str(log_name) + ".xlsx") in runs_made: print(f"Run {log_name}.xlsx already made, skipping") return # Run the same configuration many times (get distribution) #-------------------------------------------------------------------------------------------------- overall_best_solution = None number_of_runs = 30 for run in range(1, number_of_runs + 1): # Genetic Algorithm ga = GeneticAlgorithm(problem_instance=pip_problem_instance, params=params, run=run, log_name=log_name, log_dir=log_base_dir) ga_observer = LocalSearchObserver(ga) ga.register_observer(ga_observer) ga.search() ga.save_log() # find the best solution over the runs if run == 1: overall_best_solution = deepcopy(ga.best_solution) else: if ga.best_solution.fitness > overall_best_solution.fitness: overall_best_solution = deepcopy(ga.best_solution) print('overall_best_solution: ', overall_best_solution.representation) print('overall_best_solution fitness, sharpe ratio: ', overall_best_solution.fitness) print( 'overall_best_solution expected return: ', overall_best_solution.exp_return, ', ', overall_best_solution.exp_return - overall_best_solution.risk_free, 'above risk free return') print('overall_best_solution risk: ', overall_best_solution.risk) # Consolidate the runs #-------------------------------------------------------------------------------------------------- log_files = [f for f in listdir(log_dir) if isfile(join(log_dir, f))] print(log_files) fitness_runs = [] columns_name = [] counter = 0 generations = [] for log_name in log_files: if log_name.startswith('run_'): df = pd.read_excel(log_dir + "/" + log_name) fitness_runs.append(list(df.Fitness)) columns_name.append(log_name.strip(".xslx")) counter += 1 if not generations: generations = list(df["Generation"]) #fitness_sum = [sum(x) for x in zip(*fitness_runs)] df = pd.DataFrame(list(zip(*fitness_runs)), columns=columns_name) fitness_sd = list(df.std(axis=1)) fitness_mean = list(df.mean(axis=1)) #df["Fitness_Sum"] = fitness_sum df["Generation"] = generations df["Fitness_SD"] = fitness_sd df["Fitness_Mean"] = fitness_mean df["Fitness_Lower"] = df["Fitness_Mean"] - 1.96 * df["Fitness_SD"] / ( number_of_runs**0.5) df["Fitness_Upper"] = df["Fitness_Mean"] + 1.96 * df["Fitness_SD"] / ( number_of_runs**0.5) #df.to_excel(log_dir + "/all.xlsx", index=False, encoding='utf-8') log_name = ( "I-" + str(valid_Init.get(params.get("Initialization-Approach"))) + "_S-" + str(valid_Select.get(params.get("Selection-Approach"))) + # this one return None because of .select method "_C-" + str(valid_Xover.get(params.get("Crossover-Approach"))) + "_M-" + str(valid_Mutation.get(params.get("Mutation-Approach"))) + "_R-" + str(valid_Replacement.get(params.get("Replacement-Approach"))) + "_CP-" + str((params.get("Crossover-Probability"))) + "_MP-" + str( (params.get("Mutation-Probability"))) + "_PS-" + str( (params.get("Population-Size"))) + "_TS-" + str( (params.get("Tournament-Size"))) + "_G-" + str( (params.get("Number-of-Generations")))) # Exporting summary of configuration with best solution with pd.ExcelWriter(all_dir + f"{log_name}.xlsx") as writer: df.to_excel(writer, sheet_name='Fitness', index=False, encoding='utf-8') pd.DataFrame([[list(overall_best_solution.representation), overall_best_solution.fitness, overall_best_solution.exp_return, overall_best_solution.risk, (overall_best_solution.exp_return - overall_best_solution.risk_free)]], columns=["Representation", "Fitness, Sharpe Ratio", "Expected Return", "Risk", "Above Risk Free"]).\ to_excel(writer, sheet_name='Overall_Best_Solution')
def one_combination(problem_instance, params, param_labels, sample_size=30, log_run_dir=join(".", "data", "log_run"), log_all_dir=join(".", "data", "log_all")): """ Actually runs the algorithm with one set of parameters. Names the resume file from parameters of search Creates the resume file from all the runs for a set of parameters """ if not path.exists(log_run_dir): mkdir(log_run_dir) if not path.exists(log_all_dir): mkdir(log_all_dir) log_labels = { i: param_labels[j] if i in [ "Initialization-Approach", "Selection-Approach", "Crossover-Approach", "Mutation-Approach", "Replacement-Approach" ] else str(j) for i, j in params.items() } log_name = "I-{Initialization-Approach}_S-{Selection-Approach}_C-{Crossover-Approach}_M-{Mutation-Approach}_R-{Replacement-Approach}_CP-{Crossover-Probability}_MP-{Mutation-Probability}_TS-{Tournament-Size}_PS-{Population-Size}_NG-{Number-of-Generations}".format( **log_labels) resume_name = join(log_all_dir, f"{log_name}.xlsx") # Checks if the configuration has already been performed (all sample_size runs), if so, exits. # -------------------------------------------------------------------------------------------------- runs_made = [ f for f in listdir(log_all_dir) if isfile(join(log_all_dir, f)) ] if f"{log_name}.xlsx" in runs_made: print(f"Configuration {log_name} already performed, skipping...") return None # exit one_combination call # Perform several runs of the same configuration (get sample distribution) #-------------------------------------------------------------------------------------------------- overall_best_solution = None for run in range(1, sample_size + 1): # Genetic Algorithm ga = GeneticAlgorithm(problem_instance=problem_instance, params=params, run=run, log_name=log_name, log_dir=log_run_dir) ga_observer = LocalSearchObserver(ga) ga.register_observer(ga_observer) ga.search() ga.save_log() # find the best solution over the runs if run == 1: overall_best_solution = deepcopy(ga.best_solution) else: if ga.best_solution.fitness < overall_best_solution.fitness: overall_best_solution = deepcopy(ga.best_solution) print('overall_best_solution: ', overall_best_solution.representation) print('overall_best_solution fitness: ', overall_best_solution.fitness) # Consolidate the runs #-------------------------------------------------------------------------------------------------- sub_log_dir = join(log_run_dir, log_name) log_files = [ f for f in listdir(sub_log_dir) if isfile(join(sub_log_dir, f)) ] fitness_runs = [] columns_name = [] counter = 0 generations = [] # Going to each run for a given parameter configuration and extracting fitness for each generation for file_name in log_files: if file_name.startswith('run_'): df = pd.read_excel(join(sub_log_dir, file_name)) fitness_runs.append(list(df["Fitness"])) columns_name.append(splitext(file_name)[0]) counter += 1 if not generations: generations = list(df["Generation"]) df = pd.DataFrame(list(zip(*fitness_runs)), columns=columns_name) fitness_std = list(df.std(axis=1)) fitness_mean = list(df.mean(axis=1)) df["Generation"] = generations df["Fitness_STD"] = fitness_std df["Fitness_Mean"] = fitness_mean df["Fitness_Lower"] = df["Fitness_Mean"] - 1.96 * df["Fitness_STD"] / ( sample_size**0.5) df["Fitness_Upper"] = df["Fitness_Mean"] + 1.96 * df["Fitness_STD"] / ( sample_size**0.5) # Exporting summary of configuration with best solution with pd.ExcelWriter(join(log_all_dir, f"{log_name}.xlsx")) as writer: df.to_excel(writer, sheet_name='Fitness', index=False, encoding='utf-8') pd.DataFrame([[overall_best_solution.representation, overall_best_solution.fitness]], columns=["Representation", "Fitness"]).\ to_excel(writer, sheet_name='Overall_Best_Solution', index=False)
) df_gains_result_row = pd.DataFrame([[ add, reduce, buy_and_hold_gain, buy_and_hold_rebalanced_gain, by_recommendations_gain, by_recommendations_rebalanced_gain ]], columns=df_gains_result.columns) df_gains_result = df_gains_result.append(df_gains_result_row) df_by_recommendations.loc[:, sum_col] = df_by_recommendations.sum( numeric_only=True, axis=1) df_by_recommendations_rebalanced.loc[:, sum_col] = df_by_recommendations_rebalanced.sum( numeric_only=True, axis=1) date_string = datetime.now().strftime("%Y-%m-%d %H.%M") with pd.ExcelWriter( f'results/results {params} {date_string}.xlsx') as writer: df_buy_and_hold.to_excel(writer, sheet_name='buy-and-hold'[:30]) df_buy_and_hold_rebalanced.to_excel( writer, sheet_name='buy-and-hold-monthly-rebalanced'[:30]) df_by_recommendations.to_excel(writer, sheet_name='recommendations'[:30]) df_by_recommendations_rebalanced.to_excel( writer, sheet_name='recommendations-monthly-rebalanced'[:30]) df_positions.to_excel(writer, sheet_name='recommended-positions'[:30]) df_analyzed.to_excel(writer, sheet_name='analyzed'[:30]) df_gains_result.to_excel(f'results/results summary {date_string}.xlsx')