visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"])

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice")
visit_pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom, "genreprice")

# change column names
visit_pivot_genreprice.rename(columns=lambda c: "v_{}".format(c), inplace=True)
visit_pivot_genreprice_period.rename(columns=lambda c: "v_{}".format(c), inplace=True)

# write
visit_pivot_genreprice.to_pickle("../model/visit_pivot_genreprice.pkl")
visit_pivot_genreprice_period.to_pickle("../model/visit_pivot_genreprice_period.pkl")

LOG.info("finished")
Exemplo n.º 2
0
bought2 = bought.copy()
bought2 = bought2.merge(users, on=["USER_ID"])
bought2 = bought2.merge(coupon_tr, on=["COUPON_ID"])
bought2 = Grouping.to_group(bought2,
                            ["USER_ID", "small_area", "genre", "period"], True)

numer = bought2[["USER_ID", "small_area", "genre", "count", "period"]].copy()

# exclude non-spot genre and same name with prefecture
denom_mask = (denom.genre.isin(Utility.spot_genre)
              & ~(denom.small_area.isin(Utility.prefs)))
denom = denom[denom_mask]
denom = denom.rename(columns={"small_area": "sarea"})

numer_mask = (numer.genre.isin(Utility.spot_genre)
              & ~(numer.small_area.isin(Utility.prefs)))
numer = numer[numer_mask]
numer = numer.rename(columns={"small_area": "sarea"})

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
pivot_sarea = Grouping.to_pivotdf(numer, denom, "sarea")
pivot_sarea_period = Grouping.to_pivotdf_period(numer, denom, "sarea")

# write
pivot_sarea.to_pickle("../model/pivot_sarea.pkl")
pivot_sarea_period.to_pickle("../model/pivot_sarea_period.pkl")

LOG.info("finished")
# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "pref", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "pref", "period"])

# add information of spotpref
numer["spotpref"] = np.where(numer.genre.isin(Utility.spot_genre), numer.pref, "NN")
denom["spotpref"] = np.where(denom.genre.isin(Utility.spot_genre), denom.pref, "NN")

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
visit_pivot_genre = Grouping.to_pivotdf(numer, denom, "genre")
visit_pivot_pref = Grouping.to_pivotdf(numer, denom, "spotpref")
visit_pivot_genre_period = Grouping.to_pivotdf_period(numer, denom, "genre")
visit_pivot_pref_period = Grouping.to_pivotdf_period(numer, denom, "spotpref")

# change column names
visit_pivot_genre.rename(columns=lambda c: "v_{}".format(c), inplace=True)
visit_pivot_pref.rename(columns=lambda c: "v_{}".format(c), inplace=True)
visit_pivot_genre_period.rename(columns=lambda c: "v_{}".format(c), inplace=True)
visit_pivot_pref_period.rename(columns=lambda c: "v_{}".format(c), inplace=True)

# write
visit_pivot_genre.to_pickle("../model/visit_pivot_genre.pkl")
visit_pivot_pref.to_pickle("../model/visit_pivot_pref.pkl")
visit_pivot_genre_period.to_pickle("../model/visit_pivot_genre_period.pkl")
visit_pivot_pref_period.to_pickle("../model/visit_pivot_pref_period.pkl")
# numer
bought2 = bought.copy()
bought2 = bought2.merge(users, on = ["USER_ID"])
bought2 = bought2.merge(coupon_tr, on = ["COUPON_ID"])
bought2 = Grouping.to_group(bought2, ["USER_ID", "small_area", "genre", "period"], True)

numer = bought2[["USER_ID","small_area","genre","count", "period"]].copy()

# exclude non-spot genre and same name with prefecture
denom_mask = ( denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs)) )
denom = denom[denom_mask]
denom = denom.rename(columns={"small_area":"sarea"})

numer_mask = ( numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs)) )
numer = numer[numer_mask]
numer = numer.rename(columns={"small_area":"sarea"})

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
pivot_sarea = Grouping.to_pivotdf(numer, denom, "sarea")
pivot_sarea_period =  Grouping.to_pivotdf_period(numer, denom, "sarea")

# write
pivot_sarea.to_pickle("../model/pivot_sarea.pkl")
pivot_sarea_period.to_pickle("../model/pivot_sarea_period.pkl")

LOG.info("finished")

Exemplo n.º 5
0
active = Grouping.to_group(active, ["USER_ID", "period"], False)

cpntr2 = Grouping.to_group(coupon_tr, ["genreprice", "period"], True)

denom = active.copy()
denom = denom.merge(users, on="USER_ID")
denom = denom[["USER_ID", "period"]]
denom = denom.merge(cpntr2, on=["period"])
denom = denom[["USER_ID", "genreprice", "count", "period"]].copy()

# numer
bought2 = bought.copy()
bought2 = bought2.merge(users, on=["USER_ID"])
bought2 = bought2.merge(coupon_tr, on=["COUPON_ID"])
bought2 = Grouping.to_group(bought2, ["USER_ID", "genreprice", "period"], True)

numer = bought2[["USER_ID", "genreprice", "count", "period"]].copy()

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice")
pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom,
                                                     "genreprice")

# write
pivot_genreprice.to_pickle("../model/pivot_genreprice.pkl")
pivot_genreprice_period.to_pickle("../model/pivot_genreprice_period.pkl")

LOG.info("finished")
Exemplo n.º 6
0
denom = denom.merge(cpntr2, on = ["period"])

# numer
bought2 = bought.copy()
bought2 = bought2.merge(users, on = ["USER_ID"])
bought2 = bought2.merge(coupon_tr, on = ["COUPON_ID"])
bought2 = Grouping.to_group(bought2, ["USER_ID", "user_pref", "pref", "genre", "period"], True)

numer = bought2[["USER_ID","pref","genre","count", "period"]].copy()

# add information of spotpref
denom["spotpref"] = np.where(denom.genre.isin(Utility.spot_genre), denom.pref, "NN")
numer["spotpref"] = np.where(numer.genre.isin(Utility.spot_genre), numer.pref, "NN")

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
pivot_genre = Grouping.to_pivotdf(numer, denom, "genre")
pivot_pref = Grouping.to_pivotdf(numer, denom, "spotpref")
pivot_genre_period = Grouping.to_pivotdf_period(numer, denom, "genre")
pivot_pref_period =  Grouping.to_pivotdf_period(numer, denom, "spotpref")

# write
pivot_genre.to_pickle("../model/pivot_genre.pkl")
pivot_pref.to_pickle("../model/pivot_pref.pkl")
pivot_genre_period.to_pickle("../model/pivot_genre_period.pkl")
pivot_pref_period.to_pickle("../model/pivot_pref_period.pkl")

LOG.info("finished")