示例#1
0
def adjust_hyper_param(df_X, df_Y, model_flag):

    if model_flag == 3:
        param = [{
            "learning_rate": [0.05, 0.07, 0.1],
            "n_estimators": [60, 70, 80, 90, 100],
            "max_depth": [3, 4, 5],
            "subsample": [0.6, 0.7, 0.8]
        }]

        gdbt = gradient_boosting.GradientBoostingRegressor(
            loss="ls", criterion="friedman_mse", warm_start=True)

        grid = GridSearchCV(gdbt,
                            param_grid=param,
                            scoring="neg_mean_squared_error",
                            n_jobs=-1,
                            cv=5,
                            return_train_score=True)

        grid.fit(df_X, df_Y)

        print(grid.best_params_)
        print(grid.best_score_)
        print(grid.best_estimator_)

        res = grid.cv_results_
        output.write_csv(pd.DataFrame(res), 2207, "2207_GDBT_grid_search")
示例#2
0
def get_stock_info(options, stock_code):
    base_url = "https://www.cnyes.com/twstock/ps_historyprice/"
    url = base_url + str(stock_code) + ".htm"

    driver = webdriver.Chrome(chrome_options=options)

    driver.get(url)
    driver.maximize_window()

    #設定開始日期為2009/01/01
    start_date = driver.find_element_by_xpath(
        "//*[@id='ctl00_ContentPlaceHolder1_startText']")
    driver.execute_script("arguments[0].value = '2009/01/01';", start_date)

    #設定結束日期為2019/12/31
    end_date = driver.find_element_by_xpath(
        "//*[@id='ctl00_ContentPlaceHolder1_endText']")
    driver.execute_script("arguments[0].value = '2019/12/31';", end_date)

    search_btn = driver.find_element_by_xpath(
        "//*[@id='ctl00_ContentPlaceHolder1_submitBut']")
    search_btn.click()
    driver.minimize_window()

    col_name = []
    data = []

    for i in range(1, 11):
        name = driver.find_element_by_xpath(
            "//*[@id='main3']/div[5]/div[3]/table/tbody/tr[1]/th[" + str(i) +
            "]")
        col_name.append(name.text)
    else:
        data.append(col_name)

    ct = 1
    table_element = driver.find_element_by_xpath(
        "//*[@id='main3']/div[5]/div[3]/table/tbody")

    temp = []
    try:
        for i in table_element.text.split("\n"):
            for j in i.split(" "):
                if ct > 10:
                    ct = 1
                    data.append(temp)
                    temp = []

                temp.append(j)
                ct += 1
        else:
            df = pd.DataFrame(data[2:], columns=col_name)
            output.write_csv(df, stock_code, "2207_info")
            driver.close()

    except Exception as e:
        print(e)
        print("Fail to get stock info,stock_code : {}".format(stock_code))
示例#3
0
def KD_value(df):

    print(df)
    K_lst = [50]
    D_lst = [50]

    for i in reversed(range(df.shape[0] - 8)):
        RSV = (df.iloc[i, 4] - min(df.iloc[i:i + 9, 3])) / (
            max(df.iloc[i:i + 9, 2]) - min(df.iloc[i:i + 9, 3]))
        today_K = K_lst[i - (df.shape[0] - 9)] * 0.67 + RSV * 0.33
        K_lst.append(today_K)
        today_D = D_lst[i - (df.shape[0] - 9)] * 0.67 + today_K * 0.33
        D_lst.append(today_D)

    KD_df = pd.DataFrame(
        dict(
            zip(["K_value", "D_value"],
                [list(reversed(K_lst)),
                 list(reversed(D_lst))])))
    output.write_csv(KD_df, 2207, "2207_KD_value")
示例#4
0
def RSI_value(df):
    catch_range_lst = [3, 5, 7, 10]
    fluctuation_lst = df["漲跌"].to_list()
    fluctuation_up_lst = []
    fluctuation_down_lst = []

    for i in fluctuation_lst:
        if i > 0:
            fluctuation_up_lst.append(i)
            fluctuation_down_lst.append(0)
        elif i < 0:
            fluctuation_up_lst.append(0)
            fluctuation_down_lst.append(abs(i))
        else:
            fluctuation_up_lst.append(0)
            fluctuation_down_lst.append(0)

    accumulative_up_lst = []
    accumulative_down_lst = []

    for i in range(len(catch_range_lst)):
        #每n個一取,最後就會有n - 1個取不到(沒有資料了)
        temp_lst = []
        for j in range(len(fluctuation_up_lst) - (catch_range_lst[i] - 1)):
            temp = 0
            for k in range(catch_range_lst[i]):
                temp += fluctuation_up_lst[j + k]
                temp /= catch_range_lst[i]
            temp_lst.append(temp)
        accumulative_up_lst.append(temp_lst)

    for i in range(len(catch_range_lst)):
        #每n個一取,最後就會有n - 1個取不到(沒有資料了)
        temp_lst = []
        for j in range(len(fluctuation_down_lst) - (catch_range_lst[i] - 1)):
            temp = 0
            for k in range(catch_range_lst[i]):
                temp += fluctuation_down_lst[j + k]
                temp /= catch_range_lst[i]
            temp_lst.append(temp)
        accumulative_down_lst.append(temp_lst)

    RSI_lst = []

    for i in range(len(accumulative_up_lst)):
        temp_lst = []
        for j in range(len(accumulative_up_lst[i])):
            RSI = 100 * accumulative_up_lst[i][j] / (
                accumulative_up_lst[i][j] + accumulative_down_lst[i][j])
            temp_lst.append(RSI)
        RSI_lst.append(temp_lst)

    #對齊所有尺度的RSI值
    for i in range(3):
        RSI_lst[i] = RSI_lst[i][:len(RSI_lst[3])]

    RSI_df = pd.DataFrame(
        dict(zip(["3日RSI", "5日RSI", "7日RSI", "10日RSI"], RSI_lst)))

    RSI_up_thrend = []
    RSI_down_thrend = []

    for i in range(RSI_df.shape[0]):
        ct = 0
        if RSI_df.iloc[i, 0] > RSI_df.iloc[i, 1]:
            ct = 1
            if RSI_df.iloc[i, 1] > RSI_df.iloc[i, 2]:
                ct = 2
                if RSI_df.iloc[i, 2] > RSI_df.iloc[i, 3]:
                    ct = 3

        RSI_up_thrend.append(ct)

    for i in range(RSI_df.shape[0]):
        ct = 0
        if RSI_df.iloc[i, 1] > RSI_df.iloc[i, 0]:
            ct = 1
            if RSI_df.iloc[i, 2] > RSI_df.iloc[i, 1]:
                ct = 2
                if RSI_df.iloc[i, 3] > RSI_df.iloc[i, 2]:
                    ct = 3

        RSI_down_thrend.append(ct)

    RSI_df["RSI_up_thrend"] = RSI_up_thrend
    RSI_df["RSI_down_thrend"] = RSI_down_thrend

    output.write_csv(RSI_df, 2207, "2207_RSI_value")
示例#5
0
def MACD_value(df):
    #構建出的list都是由2009年進展到2019年
    DI_lst = []
    EMA12_lst = []
    EMA26_lst = []

    for i in reversed(range(df.shape[0])):
        DI_lst.append((df.iloc[i, 2] + df.iloc[i, 3] + 2 * df.iloc[i, 4]) / 4)

    for i in range(11, len(DI_lst)):
        EMA12_lst.append(sum(DI_lst[i - 11:i + 1]) / 12)

    for i in range(25, len(DI_lst)):
        EMA26_lst.append(sum(DI_lst[i - 25:i + 1]) / 26)

    #EMA平滑化
    for i in range(1, len(EMA12_lst)):
        EMA12_lst[i] = EMA12_lst[i - 1] * 11 / 13 + DI_lst[11:][i] * 2 / 13

    for i in range(1, len(EMA26_lst)):
        EMA26_lst[i] = EMA26_lst[i - 1] * 25 / 27 + DI_lst[25:][i] * 2 / 27

    #丟棄第一天
    EMA12_lst.pop(0)
    EMA26_lst.pop(0)

    EMA12_lst = EMA12_lst[len(EMA12_lst) - len(EMA26_lst):]

    DIF_lst = []
    for i in range(len(EMA12_lst)):
        DIF_lst.append(EMA12_lst[i] - EMA26_lst[i])

    first_MACD = sum(DIF_lst[:9]) / 9
    MACD_lst = [first_MACD]

    for i in range(len(DIF_lst[10:])):
        MACD_lst.append(MACD_lst[i - 1] * 4 / 5 + DIF_lst[i] / 5)

    #丟棄前9天,使其與MACD_lst的長度保持一致
    DIF_lst = DIF_lst[9:]

    df = pd.DataFrame(
        dict(
            zip(["DIF", "MACD"],
                [list(reversed(DIF_lst)),
                 list(reversed(MACD_lst))])))

    MACD_buy = []
    MACD_sell = []

    for i in range(1, df.shape[0]):
        if df.iloc[i - 1, 0] < df.iloc[i - 1, 1]:
            if df.iloc[i, 0] > df.iloc[i, 1]:
                MACD_buy.append(1)
            else:
                MACD_buy.append(0)
        else:
            MACD_buy.append(0)

        if df.iloc[i - 1, 0] > df.iloc[i - 1, 1]:
            if df.iloc[i, 0] < df.iloc[i, 1]:
                MACD_sell.append(1)
            else:
                MACD_sell.append(0)
        else:
            MACD_sell.append(0)

    df.drop(0, axis=0, inplace=True)
    df["MACD_buy"] = MACD_buy
    df["MACD_sell"] = MACD_sell

    output.write_csv(df, 2207, "2207_MACD_value")
def hard_insert():
    path = os.getcwd()
    financial_df, mis_df, ie_df, accounting_df = read_file()

    source_lst_1 = []
    source_lst_2 = []

    #數學系
    with open(path + "/1081/2104_e.html", 'r', encoding='utf-8') as file:
        source_lst_1.append(file.read())

    with open(path + "/1082/2104_e.html", 'r', encoding='utf-8') as file:
        source_lst_2.append(file.read())

    #企管系
    with open(path + "/1081/5204_e.html", 'r', encoding='utf-8') as file:
        source_lst_1.append(file.read())

    with open(path + "/1082/5204_e.html", 'r', encoding='utf-8') as file:
        source_lst_2.append(file.read())

    #經濟系
    with open(path + "/1081/5104_e.html", 'r', encoding='utf-8') as file:
        source_lst_1.append(file.read())

    with open(path + "/1082/5104_e.html", 'r', encoding='utf-8') as file:
        source_lst_2.append(file.read())

    df_lst_1 = []
    df_lst_2 = []

    remine_cols = [
        "Year Standing", "Course ID", "Course Title", "Credit", "Credit type",
        "Day/Period",
        "Remarks(Might contain Chinese due to course remarks which cannot be translated afterwards)"
    ]
    for i in source_lst_1:
        df = pd.read_html(i)
        df = df[0]
        df = df[remine_cols]
        df.columns = [
            "Year Standing", "Course ID", "Course Title", "Credit",
            "Credit type", "Day/Period", "Remarks"
        ]
        df['semester'] = 1
        df_lst_1.append(df)

    for i in source_lst_2:
        df = pd.read_html(i)
        df = df[0]
        df = df[remine_cols]
        df.columns = [
            "Year Standing", "Course ID", "Course Title", "Credit",
            "Credit type", "Day/Period", "Remarks"
        ]
        df['semester'] = 2
        df_lst_2.append(df)

    df_lst = concat_df_lst(df_lst_1, df_lst_2)

    math_df = df_lst[0]
    manage_df = df_lst[1]
    eco_df = df_lst[2]

    math_df.reset_index(inplace=True)
    manage_df.reset_index(inplace=True)
    eco_df.reset_index(inplace=True)

    #新增數學系支援的課程
    mis_insert_df = math_df.loc[
        math_df["Course Title"].str.contains("Calculus")
        & math_df["Remarks"].str.contains("Information Management")]
    ie_insert_df = math_df.loc[
        math_df["Course Title"].str.contains("Calculus")
        & math_df["Remarks"].str.contains("Computer Science")]
    financial_insert_df = math_df.loc[
        math_df["Course Title"].str.contains("Calculus")
        & math_df["Remarks"].str.contains("Finance and Banking")]
    accounting_insert_df = math_df.loc[
        math_df["Course Title"].str.contains("Calculus")
        & math_df["Remarks"].str.contains(
            "Accounting and Information Technology")]

    #新增企管系支援的課程
    financial_insert_df = pd.concat([
        financial_insert_df, manage_df.loc[
            manage_df["Course Title"].str.contains("Introduction to Business")
            & manage_df["Remarks"].str.contains("Finance and Banking")]
    ],
                                    axis=0,
                                    ignore_index=True)
    accounting_insert_df = pd.concat([
        accounting_insert_df,
        manage_df.loc[manage_df["Course Title"].str.contains(
            "Seminar on Humanistic and Business Ethics")
                      & manage_df["Remarks"].str.contains(
                          "Accounting and Information Technology")]
    ],
                                     axis=0,
                                     ignore_index=True)
    mis_insert_df = pd.concat([
        mis_insert_df, manage_df.loc[
            manage_df["Course Title"].str.contains("Introduction to Business")
            & manage_df["Remarks"].str.contains("Information Management")]
    ],
                              axis=0,
                              ignore_index=True)
    mis_insert_df = pd.concat([
        mis_insert_df, manage_df.loc[
            manage_df["Course Title"].str.contains("Business Ethics")
            & manage_df["Remarks"].str.contains("Information Management")]
    ],
                              axis=0,
                              ignore_index=True)

    #新增經濟學系支援的課程
    financial_insert_df = pd.concat([
        financial_insert_df, eco_df.loc[
            eco_df["Course Title"].str.contains("Principle of Economics")
            & eco_df["Remarks"].str.contains("Finance and Banking")]
    ],
                                    axis=0,
                                    ignore_index=True)
    financial_insert_df = pd.concat([
        financial_insert_df,
        eco_df.loc[eco_df["Course Title"].str.contains("Microeconomics")
                   & eco_df["Remarks"].str.contains("Finance and Banking")]
    ],
                                    axis=0,
                                    ignore_index=True)
    accounting_insert_df = pd.concat([
        accounting_insert_df, eco_df.loc[
            eco_df["Course Title"].str.contains("Principle of Economics")
            & eco_df["Remarks"].str.contains(
                "Accounting and Information Technology")]
    ],
                                     axis=0,
                                     ignore_index=True)
    mis_insert_df = pd.concat([
        mis_insert_df, eco_df.loc[
            eco_df["Course Title"].str.contains("Principle of Economics")
            & eco_df["Remarks"].str.contains("Information Management")]
    ],
                              axis=0,
                              ignore_index=True)

    #新增資管系支援的課程
    accounting_insert_df = pd.concat([
        accounting_insert_df, mis_df.loc[
            mis_df["Course Title"].str.contains("Introduction to Computer")
            & mis_df["Remarks"].str.contains(
                "Accounting and Information Technology")]
    ],
                                     axis=0,
                                     ignore_index=True)

    #新增財金系的支援課程
    accounting_insert_df = pd.concat([
        accounting_insert_df, financial_df.loc[
            financial_df["Course Title"].str.contains("Statistics")
            & financial_df["Remarks"].str.contains(
                "Accounting and Information Technology")]
    ],
                                     axis=0,
                                     ignore_index=True)
    mis_insert_df = pd.concat([
        mis_insert_df, financial_df.loc[
            financial_df["Course Title"].str.contains("Statistics")
            & financial_df["Remarks"].str.contains("Information Management")]
    ],
                              axis=0,
                              ignore_index=True)

    #新增會資系支援的課程
    mis_insert_df = pd.concat([
        mis_insert_df, accounting_df.loc[
            accounting_df["Course Title"].str.contains("Accounting")
            & accounting_df["Remarks"].str.contains("Information Management")]
    ],
                              axis=0,
                              ignore_index=True)

    financial_insert_df.drop(['Remarks', "index"], axis=1, inplace=True)
    financial_df.drop("Remarks", axis=1, inplace=True)
    financial_df = pd.concat([financial_df, financial_insert_df],
                             axis=0,
                             ignore_index=True)

    mis_insert_df.drop(['Remarks', 'index'], axis=1, inplace=True)
    mis_df.drop("Remarks", axis=1, inplace=True)
    mis_df = pd.concat([mis_df, mis_insert_df], axis=0, ignore_index=True)

    ie_insert_df.drop(['Remarks', 'index'], axis=1, inplace=True)
    ie_df.drop("Remarks", axis=1, inplace=True)
    ie_df = pd.concat([ie_df, ie_insert_df], axis=0, ignore_index=True)

    accounting_insert_df.drop(['Remarks', 'index'], axis=1, inplace=True)

    #手動刪除會資系的 Principle of Economics(II)
    accounting_insert_df.drop(accounting_insert_df[
        accounting_insert_df["Course ID"] == 5101002].index,
                              axis=0,
                              inplace=True)
    accounting_df.drop("Remarks", axis=1, inplace=True)
    accounting_df = pd.concat([accounting_df, accounting_insert_df],
                              axis=0,
                              ignore_index=True,
                              sort=True)

    #按照年級和學期排列
    financial_df.sort_values(by=["Year Standing", "semester"],
                             ascending=True,
                             inplace=True)
    mis_df.sort_values(by=["Year Standing", "semester"],
                       ascending=True,
                       inplace=True)
    ie_df.sort_values(by=["Year Standing", "semester"],
                      ascending=True,
                      inplace=True)
    accounting_df.sort_values(by=["Year Standing", "semester"],
                              ascending=True,
                              inplace=True)

    #對個別可以抵免的課程進行特殊處理
    #會資系的微積分先暫定變成微積分(微積分(一) -> 微積分) (取消)
    #會資系的統計學先暫定變成統計學(一) (方便比對)
    #accounting_df.loc[accounting_df["Course Title"] == "Calculus (I)" , "Course Title"] = "Calculus"

    accounting_df.loc[accounting_df["Course Title"] == "Statistics",
                      "Course Title"] = "Statistics (I)"

    new_df_lst = [ie_df, financial_df, accounting_df, mis_df]
    output.write_csv(new_df_lst)