示例#1
0
def save_excel(data):
    # json 파일 으로 저장하기
    file = open("./recipe.json", "w+")
    file.write(
        json.dumps(data))  # json.dumps() 로 result 는 list 에서 string 으로 변경된다
    # excel 로 저장하기
    json_setting = pd.read_json('./recipe.json')
    xlsx_setting = pd.ExcelWriter('recipe.xlsx')
    json_setting.to_excel(xlsx_setting, 'sheet1')
    xlsx_setting.save()
示例#2
0
    def __init__(self):
        BaseIpamProcessing.__init__(self)

        # Pulls in interim data file for processing.
        self.workbook_file = os.path.join(
            self.dir_cls.processed_dir(),
            self.filename_cls.processed_filename()
        )

        # Builds pandas writer object.
        self.writer = pd.ExcelWriter(self.workbook_file, engine='xlsxwriter')
def one_combination():
    """
    Actually runs the algorithm with one set of parameters.
    Names the resume file from parameters of search
    Creates the resume file from all the runs for a set of parameters
    """
    log_base_dir = "./log/"  # Base dir for log of initial runs
    if not path.exists(log_base_dir):
        mkdir(log_base_dir)
    all_dir = "./log_all/"  # Base dir for resume the resume files
    if not path.exists(all_dir):
        mkdir(all_dir)

    log_name = (
        "I-" + str(valid_Init.get(params.get("Initialization-Approach"))) +
        "_S-" + str(valid_Select.get(params.get("Selection-Approach"))) +
        "_C-" + str(valid_Xover.get(params.get("Crossover-Approach"))) +
        "_M-" + str(valid_Mutation.get(params.get("Mutation-Approach"))) +
        "_R-" +
        str(valid_Replacement.get(params.get("Replacement-Approach"))) +
        "_CP-" + str((params.get("Crossover-Probability"))) + "_MP-" + str(
            (params.get("Mutation-Probability"))) + "_PS-" + str(
                (params.get("Population-Size"))) + "_TS-" + str(
                    (params.get("Tournament-Size"))) + "_G-" + str(
                        (params.get("Number-of-Generations"))))
    resume_name = f"{all_dir}{log_name}.xlsx"

    # checks if the same run as already been performed, if so, skips it.
    # -----------------
    log_dir = str(log_base_dir) + str(log_name)
    if not path.exists(log_dir):
        mkdir(log_dir)

    runs_made = [f for f in listdir(all_dir) if isfile(join(all_dir, f))]
    if (str(log_name) + ".xlsx") in runs_made:
        print(f"Run {log_name}.xlsx already made, skipping")
        return

    # Run the same configuration many times (get distribution)
    #--------------------------------------------------------------------------------------------------
    overall_best_solution = None
    number_of_runs = 30
    for run in range(1, number_of_runs + 1):
        # Genetic Algorithm
        ga = GeneticAlgorithm(problem_instance=pip_problem_instance,
                              params=params,
                              run=run,
                              log_name=log_name,
                              log_dir=log_base_dir)

        ga_observer = LocalSearchObserver(ga)
        ga.register_observer(ga_observer)
        ga.search()
        ga.save_log()

        # find the best solution over the runs
        if run == 1:
            overall_best_solution = deepcopy(ga.best_solution)
        else:
            if ga.best_solution.fitness > overall_best_solution.fitness:
                overall_best_solution = deepcopy(ga.best_solution)

        print('overall_best_solution: ', overall_best_solution.representation)
        print('overall_best_solution fitness, sharpe ratio: ',
              overall_best_solution.fitness)
        print(
            'overall_best_solution expected return: ',
            overall_best_solution.exp_return, ', ',
            overall_best_solution.exp_return - overall_best_solution.risk_free,
            'above risk free return')
        print('overall_best_solution risk: ', overall_best_solution.risk)

    # Consolidate the runs
    #--------------------------------------------------------------------------------------------------

    log_files = [f for f in listdir(log_dir) if isfile(join(log_dir, f))]
    print(log_files)

    fitness_runs = []
    columns_name = []
    counter = 0
    generations = []

    for log_name in log_files:
        if log_name.startswith('run_'):
            df = pd.read_excel(log_dir + "/" + log_name)
            fitness_runs.append(list(df.Fitness))
            columns_name.append(log_name.strip(".xslx"))
            counter += 1

            if not generations:
                generations = list(df["Generation"])

    #fitness_sum = [sum(x) for x in zip(*fitness_runs)]

    df = pd.DataFrame(list(zip(*fitness_runs)), columns=columns_name)

    fitness_sd = list(df.std(axis=1))
    fitness_mean = list(df.mean(axis=1))

    #df["Fitness_Sum"] = fitness_sum
    df["Generation"] = generations
    df["Fitness_SD"] = fitness_sd
    df["Fitness_Mean"] = fitness_mean
    df["Fitness_Lower"] = df["Fitness_Mean"] - 1.96 * df["Fitness_SD"] / (
        number_of_runs**0.5)
    df["Fitness_Upper"] = df["Fitness_Mean"] + 1.96 * df["Fitness_SD"] / (
        number_of_runs**0.5)

    #df.to_excel(log_dir + "/all.xlsx", index=False, encoding='utf-8')
    log_name = (
        "I-" + str(valid_Init.get(params.get("Initialization-Approach"))) +
        "_S-" + str(valid_Select.get(params.get("Selection-Approach")))
        +  # this one return None because of .select method
        "_C-" + str(valid_Xover.get(params.get("Crossover-Approach"))) +
        "_M-" + str(valid_Mutation.get(params.get("Mutation-Approach"))) +
        "_R-" +
        str(valid_Replacement.get(params.get("Replacement-Approach"))) +
        "_CP-" + str((params.get("Crossover-Probability"))) + "_MP-" + str(
            (params.get("Mutation-Probability"))) + "_PS-" + str(
                (params.get("Population-Size"))) + "_TS-" + str(
                    (params.get("Tournament-Size"))) + "_G-" + str(
                        (params.get("Number-of-Generations"))))

    # Exporting summary of configuration with best solution
    with pd.ExcelWriter(all_dir + f"{log_name}.xlsx") as writer:
        df.to_excel(writer,
                    sheet_name='Fitness',
                    index=False,
                    encoding='utf-8')
        pd.DataFrame([[list(overall_best_solution.representation), overall_best_solution.fitness,
                       overall_best_solution.exp_return, overall_best_solution.risk,
                       (overall_best_solution.exp_return - overall_best_solution.risk_free)]],
                     columns=["Representation", "Fitness, Sharpe Ratio", "Expected Return", "Risk", "Above Risk Free"]).\
            to_excel(writer, sheet_name='Overall_Best_Solution')
def one_combination(problem_instance,
                    params,
                    param_labels,
                    sample_size=30,
                    log_run_dir=join(".", "data", "log_run"),
                    log_all_dir=join(".", "data", "log_all")):
    """
    Actually runs the algorithm with one set of parameters.
    Names the resume file from parameters of search
    Creates the resume file from all the runs for a set of parameters
    """
    if not path.exists(log_run_dir):
        mkdir(log_run_dir)

    if not path.exists(log_all_dir):
        mkdir(log_all_dir)

    log_labels = {
        i: param_labels[j] if i in [
            "Initialization-Approach", "Selection-Approach",
            "Crossover-Approach", "Mutation-Approach", "Replacement-Approach"
        ] else str(j)
        for i, j in params.items()
    }
    log_name = "I-{Initialization-Approach}_S-{Selection-Approach}_C-{Crossover-Approach}_M-{Mutation-Approach}_R-{Replacement-Approach}_CP-{Crossover-Probability}_MP-{Mutation-Probability}_TS-{Tournament-Size}_PS-{Population-Size}_NG-{Number-of-Generations}".format(
        **log_labels)
    resume_name = join(log_all_dir, f"{log_name}.xlsx")

    # Checks if the configuration has already been performed (all sample_size runs), if so, exits.
    # --------------------------------------------------------------------------------------------------
    runs_made = [
        f for f in listdir(log_all_dir) if isfile(join(log_all_dir, f))
    ]
    if f"{log_name}.xlsx" in runs_made:
        print(f"Configuration {log_name} already performed, skipping...")
        return None  # exit one_combination call

    # Perform several runs of the same configuration (get sample distribution)
    #--------------------------------------------------------------------------------------------------
    overall_best_solution = None
    for run in range(1, sample_size + 1):
        # Genetic Algorithm
        ga = GeneticAlgorithm(problem_instance=problem_instance,
                              params=params,
                              run=run,
                              log_name=log_name,
                              log_dir=log_run_dir)

        ga_observer = LocalSearchObserver(ga)
        ga.register_observer(ga_observer)
        ga.search()
        ga.save_log()

        # find the best solution over the runs
        if run == 1:
            overall_best_solution = deepcopy(ga.best_solution)
        else:
            if ga.best_solution.fitness < overall_best_solution.fitness:
                overall_best_solution = deepcopy(ga.best_solution)

        print('overall_best_solution: ', overall_best_solution.representation)
        print('overall_best_solution fitness: ', overall_best_solution.fitness)

    # Consolidate the runs
    #--------------------------------------------------------------------------------------------------
    sub_log_dir = join(log_run_dir, log_name)
    log_files = [
        f for f in listdir(sub_log_dir) if isfile(join(sub_log_dir, f))
    ]
    fitness_runs = []
    columns_name = []
    counter = 0
    generations = []

    # Going to each run for a given parameter configuration and extracting fitness for each generation
    for file_name in log_files:
        if file_name.startswith('run_'):
            df = pd.read_excel(join(sub_log_dir, file_name))
            fitness_runs.append(list(df["Fitness"]))
            columns_name.append(splitext(file_name)[0])
            counter += 1
            if not generations:
                generations = list(df["Generation"])

    df = pd.DataFrame(list(zip(*fitness_runs)), columns=columns_name)
    fitness_std = list(df.std(axis=1))
    fitness_mean = list(df.mean(axis=1))

    df["Generation"] = generations
    df["Fitness_STD"] = fitness_std
    df["Fitness_Mean"] = fitness_mean
    df["Fitness_Lower"] = df["Fitness_Mean"] - 1.96 * df["Fitness_STD"] / (
        sample_size**0.5)
    df["Fitness_Upper"] = df["Fitness_Mean"] + 1.96 * df["Fitness_STD"] / (
        sample_size**0.5)

    # Exporting summary of configuration with best solution
    with pd.ExcelWriter(join(log_all_dir, f"{log_name}.xlsx")) as writer:
        df.to_excel(writer,
                    sheet_name='Fitness',
                    index=False,
                    encoding='utf-8')
        pd.DataFrame([[overall_best_solution.representation, overall_best_solution.fitness]], columns=["Representation", "Fitness"]).\
            to_excel(writer, sheet_name='Overall_Best_Solution', index=False)
        )

        df_gains_result_row = pd.DataFrame([[
            add, reduce, buy_and_hold_gain, buy_and_hold_rebalanced_gain,
            by_recommendations_gain, by_recommendations_rebalanced_gain
        ]],
                                           columns=df_gains_result.columns)
        df_gains_result = df_gains_result.append(df_gains_result_row)

        df_by_recommendations.loc[:, sum_col] = df_by_recommendations.sum(
            numeric_only=True, axis=1)
        df_by_recommendations_rebalanced.loc[:,
                                             sum_col] = df_by_recommendations_rebalanced.sum(
                                                 numeric_only=True, axis=1)

        date_string = datetime.now().strftime("%Y-%m-%d %H.%M")
        with pd.ExcelWriter(
                f'results/results {params} {date_string}.xlsx') as writer:
            df_buy_and_hold.to_excel(writer, sheet_name='buy-and-hold'[:30])
            df_buy_and_hold_rebalanced.to_excel(
                writer, sheet_name='buy-and-hold-monthly-rebalanced'[:30])
            df_by_recommendations.to_excel(writer,
                                           sheet_name='recommendations'[:30])
            df_by_recommendations_rebalanced.to_excel(
                writer, sheet_name='recommendations-monthly-rebalanced'[:30])
            df_positions.to_excel(writer,
                                  sheet_name='recommended-positions'[:30])
            df_analyzed.to_excel(writer, sheet_name='analyzed'[:30])

df_gains_result.to_excel(f'results/results summary {date_string}.xlsx')