visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"]) # probablity dataframe ------------------------------------ # create pivoted probability dataframe visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice") visit_pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom, "genreprice") # change column names visit_pivot_genreprice.rename(columns=lambda c: "v_{}".format(c), inplace=True) visit_pivot_genreprice_period.rename(columns=lambda c: "v_{}".format(c), inplace=True) # write visit_pivot_genreprice.to_pickle("../model/visit_pivot_genreprice.pkl") visit_pivot_genreprice_period.to_pickle("../model/visit_pivot_genreprice_period.pkl") LOG.info("finished")
bought2 = bought.copy() bought2 = bought2.merge(users, on=["USER_ID"]) bought2 = bought2.merge(coupon_tr, on=["COUPON_ID"]) bought2 = Grouping.to_group(bought2, ["USER_ID", "small_area", "genre", "period"], True) numer = bought2[["USER_ID", "small_area", "genre", "count", "period"]].copy() # exclude non-spot genre and same name with prefecture denom_mask = (denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs))) denom = denom[denom_mask] denom = denom.rename(columns={"small_area": "sarea"}) numer_mask = (numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs))) numer = numer[numer_mask] numer = numer.rename(columns={"small_area": "sarea"}) # probablity dataframe ------------------------------------ # create pivoted probability dataframe pivot_sarea = Grouping.to_pivotdf(numer, denom, "sarea") pivot_sarea_period = Grouping.to_pivotdf_period(numer, denom, "sarea") # write pivot_sarea.to_pickle("../model/pivot_sarea.pkl") pivot_sarea_period.to_pickle("../model/pivot_sarea_period.pkl") LOG.info("finished")
# numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "pref", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "pref", "period"]) # add information of spotpref numer["spotpref"] = np.where(numer.genre.isin(Utility.spot_genre), numer.pref, "NN") denom["spotpref"] = np.where(denom.genre.isin(Utility.spot_genre), denom.pref, "NN") # probablity dataframe ------------------------------------ # create pivoted probability dataframe visit_pivot_genre = Grouping.to_pivotdf(numer, denom, "genre") visit_pivot_pref = Grouping.to_pivotdf(numer, denom, "spotpref") visit_pivot_genre_period = Grouping.to_pivotdf_period(numer, denom, "genre") visit_pivot_pref_period = Grouping.to_pivotdf_period(numer, denom, "spotpref") # change column names visit_pivot_genre.rename(columns=lambda c: "v_{}".format(c), inplace=True) visit_pivot_pref.rename(columns=lambda c: "v_{}".format(c), inplace=True) visit_pivot_genre_period.rename(columns=lambda c: "v_{}".format(c), inplace=True) visit_pivot_pref_period.rename(columns=lambda c: "v_{}".format(c), inplace=True) # write visit_pivot_genre.to_pickle("../model/visit_pivot_genre.pkl") visit_pivot_pref.to_pickle("../model/visit_pivot_pref.pkl") visit_pivot_genre_period.to_pickle("../model/visit_pivot_genre_period.pkl") visit_pivot_pref_period.to_pickle("../model/visit_pivot_pref_period.pkl")
# numer bought2 = bought.copy() bought2 = bought2.merge(users, on = ["USER_ID"]) bought2 = bought2.merge(coupon_tr, on = ["COUPON_ID"]) bought2 = Grouping.to_group(bought2, ["USER_ID", "small_area", "genre", "period"], True) numer = bought2[["USER_ID","small_area","genre","count", "period"]].copy() # exclude non-spot genre and same name with prefecture denom_mask = ( denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs)) ) denom = denom[denom_mask] denom = denom.rename(columns={"small_area":"sarea"}) numer_mask = ( numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs)) ) numer = numer[numer_mask] numer = numer.rename(columns={"small_area":"sarea"}) # probablity dataframe ------------------------------------ # create pivoted probability dataframe pivot_sarea = Grouping.to_pivotdf(numer, denom, "sarea") pivot_sarea_period = Grouping.to_pivotdf_period(numer, denom, "sarea") # write pivot_sarea.to_pickle("../model/pivot_sarea.pkl") pivot_sarea_period.to_pickle("../model/pivot_sarea_period.pkl") LOG.info("finished")
active = Grouping.to_group(active, ["USER_ID", "period"], False) cpntr2 = Grouping.to_group(coupon_tr, ["genreprice", "period"], True) denom = active.copy() denom = denom.merge(users, on="USER_ID") denom = denom[["USER_ID", "period"]] denom = denom.merge(cpntr2, on=["period"]) denom = denom[["USER_ID", "genreprice", "count", "period"]].copy() # numer bought2 = bought.copy() bought2 = bought2.merge(users, on=["USER_ID"]) bought2 = bought2.merge(coupon_tr, on=["COUPON_ID"]) bought2 = Grouping.to_group(bought2, ["USER_ID", "genreprice", "period"], True) numer = bought2[["USER_ID", "genreprice", "count", "period"]].copy() # probablity dataframe ------------------------------------ # create pivoted probability dataframe pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice") pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom, "genreprice") # write pivot_genreprice.to_pickle("../model/pivot_genreprice.pkl") pivot_genreprice_period.to_pickle("../model/pivot_genreprice_period.pkl") LOG.info("finished")
denom = denom.merge(cpntr2, on = ["period"]) # numer bought2 = bought.copy() bought2 = bought2.merge(users, on = ["USER_ID"]) bought2 = bought2.merge(coupon_tr, on = ["COUPON_ID"]) bought2 = Grouping.to_group(bought2, ["USER_ID", "user_pref", "pref", "genre", "period"], True) numer = bought2[["USER_ID","pref","genre","count", "period"]].copy() # add information of spotpref denom["spotpref"] = np.where(denom.genre.isin(Utility.spot_genre), denom.pref, "NN") numer["spotpref"] = np.where(numer.genre.isin(Utility.spot_genre), numer.pref, "NN") # probablity dataframe ------------------------------------ # create pivoted probability dataframe pivot_genre = Grouping.to_pivotdf(numer, denom, "genre") pivot_pref = Grouping.to_pivotdf(numer, denom, "spotpref") pivot_genre_period = Grouping.to_pivotdf_period(numer, denom, "genre") pivot_pref_period = Grouping.to_pivotdf_period(numer, denom, "spotpref") # write pivot_genre.to_pickle("../model/pivot_genre.pkl") pivot_pref.to_pickle("../model/pivot_pref.pkl") pivot_genre_period.to_pickle("../model/pivot_genre_period.pkl") pivot_pref_period.to_pickle("../model/pivot_pref_period.pkl") LOG.info("finished")