예제 #1
0
def save_module_view(request):
    if request.method == "POST":
        data = json.loads(request.body)
        ret = utils.save_module(data)
        return public.success_result_http(ret)
    else:
        return public.fail_result_http("Only POST!")
예제 #2
0
def Rating_encode(module, param):
    target_col = "Rating"
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        vc = all_df[target_col].value_counts()
        g = all_df.groupby(target_col)

        # 出している作品のうち、レビューがついているものの割合, 統計情報(User)
        col = "User_Count"
        #all_df["Rating_Reviewed_Rate"] = all_df[target_col].map(g[col].count() / vc)
        #all_df["Rating_User_Count_Sum"] = all_df[target_col].map(g[col].sum())
        #all_df["Rating_User_Count_Mean"] = all_df[target_col].map(g[col].mean())
        #all_df["Rating_User_Count_Median"] = all_df[target_col].map(g[col].median())
        #all_df["Rating_User_Count_Max"] = all_df[target_col].map(g[col].max())
        #all_df["Rating_User_Count_Min"] = all_df[target_col].map(g[col].min())
        #all_df["Rating_User_Count_Std"] = all_df[target_col].map(g[col].std())
        #all_df["Rating_User_Count_Skew"] = all_df[target_col].map(g[col].skew())

        # 出している作品のうち、レビューの平均(User)
        col = "User_Score"
        temp_g = all_df[all_df["User_Score"] != "tbd"]
        temp_g["User_Score"] = temp_g["User_Score"].astype(float)
        temp_g = temp_g.groupby(target_col)
        #all_df["Rating_User_Score_Sum"] = all_df[target_col].map(temp_g[col].sum())
        #all_df["Rating_User_Score_Mean"] = all_df[target_col].map(temp_g[col].mean())
        #all_df["Rating_User_Score_Max"] = all_df[target_col].map(temp_g[col].max())
        #all_df["Rating_User_Score_Median"] = all_df[target_col].map(temp_g[col].median())
        #all_df["Rating_User_Score_Min"] = all_df[target_col].map(temp_g[col].min())
        #all_df["Rating_User_Score_Skew"] = all_df[target_col].map(temp_g[col].skew())

        # 出している作品のうち、レビューがついているものの割合, 統計情報(Critic)
        col = "Critic_Count"
        #all_df["Rating_Critic_Count_Sum"] = all_df[target_col].map(g[col].sum())
        #all_df["Rating_Critic_Count_Mean"] = all_df[target_col].map(g[col].mean())
        #all_df["Rating_Critic_Count_Median"] = all_df[target_col].map(g[col].median())
        #all_df["Rating_Critic_Count_Max"] = all_df[target_col].map(g[col].max())
        #all_df["Rating_Critic_Count_Min"] = all_df[target_col].map(g[col].min())
        #all_df["Rating_Critic_Count_Std"] = all_df[target_col].map(g[col].std())
        #all_df["Rating_Critic_Count_Skew"] = all_df[target_col].map(g[col].skew())

        # 出している作品のうち、レビューの統計情報(Critic))
        col = "Critic_Score"
        #all_df["Rating_Critic_Score_Sum"] = all_df[target_col].map(g[col].sum())
        #all_df["Rating_Critic_Score_Mean"] = all_df[target_col].map(g[col].mean())
        #all_df["Rating_Critic_Score_Median"] = all_df[target_col].map(g[col].median())
        #all_df["Rating_Critic_Score_Max"] = all_df[target_col].map(g[col].max())
        #all_df["Rating_Critic_Score_Min"] = all_df[target_col].map(g[col].min())
        #all_df["Rating_Critic_Score_Std"] = all_df[target_col].map(g[col].std())
        #all_df["Rating_Critic_Score_Skew"] = all_df[target_col].map(g[col].skew())

        #出しているゲームの数
        #all_df["Rating_Game_Num"] = all_df[target_col].map(vc)

        # 展開しているPlatformの数
        #col = "Platform"
        #all_df["Rating_Plat_Num"] = all_df[target_col].map(g[col].unique().map(lambda x : len(x)))
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #3
0
def equal_Pub_Dev(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        all_df["equal_Pub_Dev"] = all_df.apply(
            lambda x: 1 if x["Publisher"] == x["Developer"] else 0, axis=1)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #4
0
def save_module_view(request):
    if request.method != "POST":
        return public.fail_result_http(u"Only support POST!")
    else:
        data = json.loads(request.body)
        data["creator"] = public.get_user_mail(request)
        ret = utils.save_module(data)
        return public.success_result_http(ret)
예제 #5
0
def User_Count_tbd2Null(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        #all_df["is_tbd"] = all_df["User_Score"].map(lambda x : 1 if x == "tbd" else 0)
        all_df["User_Score"] = all_df['User_Score'].replace('tbd',
                                                            None).astype(float)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #6
0
def count_encode(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        for col in param["cols"]:
            col_name = f"ce_{col}"
            vc = all_df[col].value_counts()
            all_df[col_name] = all_df[col].map(vc)
            all_df.drop(col_name, axis=1)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #7
0
def fillna(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        null_cols = all_df.T[all_df.isnull().any()].index.to_list()
        for col in null_cols:
            if all_df[col].dtype in [int, float]:
                all_df[col].fillna(-999, inplace=True)
            else:
                all_df[col].fillna("missing value", inplace=True)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #8
0
def is_Release_Year_of_Platform(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        gb = all_df.groupby("Platform")
        min_Y = gb["Year_of_Release"].min()
        all_df["is_Release_Year_of_Platform"] = all_df["Platform"].map(min_Y)
        all_df["is_Release_Year_of_Platform"] = (
            all_df["is_Release_Year_of_Platform"] == all_df["Year_of_Release"]
        ).astype(int)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #9
0
def Prod(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        for cols in param["cols"]:
            name = " * ".join(cols)
            all_df[name] = ""
            for i, col in enumerate(cols):
                if i != 0:
                    all_df[name] = all_df[name] + " * "
                all_df[name] = all_df[name] + all_df[col].astype("str").fillna(
                    "Null")
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #10
0
def onehot_encode(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        for col in param["cols"]:
            vc = all_df[col].dropna().value_counts()
            cats = vc[vc > 80].index

            x = pd.Categorical(all_df[col], categories=cats)
            out_df = pd.get_dummies(x, dummy_na=False)
            out_df.columns = out_df.columns.tolist()
            out_df.add_prefix(f'OH_{col}=')
            all_df = pd.concat([all_df, out_df], axis=1)
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #11
0
def Summarize_Sales(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        sales_cols = param["sales_cols"]
        bys = param["by"]
        for by in bys:
            # mean
            summary_mean_df = train_df.groupby(by)[sales_cols].mean()
            summary_mean_df = summary_mean_df.apply(
                lambda x: x / x["Global_Sales"], axis=1).drop("Global_Sales",
                                                              axis=1)
            all_df = all_df.merge(summary_mean_df,
                                  on=by,
                                  suffixes=("", f"_Mean_by_{by}"),
                                  how="left")

            # max
            summary_max_df = train_df.groupby(by)[sales_cols].max()
            all_df = all_df.merge(summary_max_df,
                                  on=by,
                                  suffixes=("", f"_Max_by_{by}"),
                                  how="left")

            # var
            summary_var_df = train_df.groupby(by)[sales_cols].var()
            all_df = all_df.merge(summary_var_df,
                                  on=by,
                                  suffixes=("", f"_Var_by_{by}"),
                                  how="left")

            # sum
            #summary_sum_df = train_df.groupby(by)[sales_cols].sum()
            #all_df = all_df.merge(summary_sum_df, on=by, suffixes=("", f"_Sum_by_{by}"), how="left")

            # skew
            #summary_skew_df = train_df.groupby(by)[sales_cols].skew()
            #sall_df = all_df.merge(summary_skew_df, on=by, suffixes=("", f"_Skew_by_{by}"), how="left")

        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #12
0
def label_encode(module, param):

    #attribute
    #module : dict of data
    #cols : columns to encode

    #return
    #module (updated)

    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        cols = param["cols"] + [
            col
            for col in all_df.columns if all_df[col].dtype not in [int, float]
        ]
        for col in cols:
            all_df[col] = LabelEncoder().fit_transform(
                all_df[col].values.reshape(-1, ))
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module
예제 #13
0
def Developer_encode(module, param):
    if param["flag"]:
        train_df, test_df, all_df = pick_data(module)
        # NameにPSが含まれるかどうか
        all_df["is_PS"] = all_df["Platform"].map(lambda x: int("PS" in str(x)))
        all_df["is_PC"] = all_df["Platform"].map(lambda x: int("PC" == str(x)))

        vc = all_df["Developer"].value_counts()
        g = all_df.groupby("Developer")

        # 出しているゲームのうち、NameにPSが含まれる割合
        col = "is_PS"
        all_df["Dev_PS_rate"] = all_df["Developer"].map(g[col].sum() / vc)

        # 出しているゲームのうち、PlatformにPCが含まれる割合
        col = "is_PC"
        all_df["Dev_PC_rate"] = all_df["Developer"].map(g[col].sum() / vc)
        #all_df.drop(col, axis=1, inplace=True)
        # データセットに存在する年代の広さ
        col = "Year_of_Release"
        all_df["Dev_Spread_of_Generation"] = all_df["Developer"].map(
            g[col].max() - g[col].min())

        ########################################################################################
        # 出している作品のうち、レビューカウントの統計情報(User)
        col = "User_Count"
        #all_df["Dev_Reviewed_Rate"] = all_df["Developer"].map(g[col].count() / vc)#
        #all_df["Dev_User_Count_Sum"] = all_df["Developer"].map(g[col].sum())
        #all_df["Dev_User_Count_Mean"] = all_df["Developer"].map(g[col].mean())#
        #all_df["Dev_User_Count_Median"] = all_df["Developer"].map(g[col].median())#
        #all_df["Dev_User_Count_Max"] = all_df["Developer"].map(g[col].max())
        #all_df["Dev_User_Count_Min"] = all_df["Developer"].map(g[col].min())
        #all_df["Dev_User_Count_Std"] = all_df["Developer"].map(g[col].std())
        #all_df["Dev_User_Count_Skew"] = all_df["Developer"].map(g[col].skew())

        ########################################################################################
        # 出している作品のうち、レビューの平均(User)
        col = "User_Score"
        temp_g = all_df[all_df["User_Score"] != "tbd"]
        temp_g["User_Score"] = temp_g["User_Score"].astype(float)
        temp_g = temp_g.groupby("Developer")
        #all_df["Dev_User_Score_Sum"] = all_df["Developer"].map(temp_g[col].sum())
        all_df["Dev_User_Score_Mean"] = all_df["Developer"].map(
            temp_g[col].mean())  #
        #all_df["Dev_User_Score_Max"] = all_df["Developer"].map(temp_g[col].max())
        #all_df["Dev_User_Score_Median"] = all_df["Developer"].map(temp_g[col].median())
        #all_df["Dev_User_Score_Min"] = all_df["Developer"].map(temp_g[col].min())
        #all_df["Dev_User_Score_Skew"] = all_df["Developer"].map(temp_g[col].skew())

        ########################################################################################
        # 出している作品のうち、レビューがついているものの割合, 統計情報(Critic)
        col = "Critic_Count"
        #all_df["Dev_Critic_Count_Sum"] = all_df["Developer"].map(g[col].sum())
        #all_df["Dev_Critic_Count_Mean"] = all_df["Developer"].map(g[col].mean())
        #all_df["Dev_Critic_Count_Median"] = all_df["Developer"].map(g[col].median())#
        #all_df["Dev_Critic_Count_Max"] = all_df["Developer"].map(g[col].max())
        #all_df["Dev_Critic_Count_Min"] = all_df["Developer"].map(g[col].min())#
        #all_df["Dev_Critic_Count_Std"] = all_df["Developer"].map(g[col].std())#
        #all_df["Dev_Critic_Count_Skew"] = all_df["Developer"].map(g[col].skew())#

        ########################################################################################
        # 出している作品のうち、レビューの統計情報(Critic))
        col = "Critic_Score"
        all_df["Dev_Critic_Score_Sum"] = all_df["Developer"].map(
            g[col].sum())  #
        all_df["Dev_Critic_Score_Mean"] = all_df["Developer"].map(
            g[col].mean())  #
        all_df["Dev_Critic_Score_Median"] = all_df["Developer"].map(
            g[col].median())  #
        all_df["Dev_Critic_Score_Max"] = all_df["Developer"].map(
            g[col].max())  #
        all_df["Dev_Critic_Score_Min"] = all_df["Developer"].map(
            g[col].min())  #
        #all_df["Dev_Critic_Score_Std"] = all_df["Developer"].map(g[col].std())
        #all_df["Dev_Critic_Score_Skew"] = all_df["Developer"].map(g[col].skew())

        ########################################################################################
        #出しているゲームの数
        #all_df["Dev_Game_Num"] = all_df["Developer"].map(vc)

        # 展開しているPlatformの数
        col = "Platform"
        #all_df["Dev_Plat_Num"] = all_df["Developer"].map(g[col].unique().map(lambda x : len(x)))
        return save_module(module, all_df, train_df.shape[0])
    else:
        return module