visit_tr2, coupon_tr, "small_area") visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "genre") visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genre", "small_area", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "small_area", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "small_area", "period"]) # exclude non-spot genre and same name with prefecture numer_mask = (numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs))) numer = numer[numer_mask] numer = numer.rename(columns={"small_area": "sarea"}) denom_mask = (denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs))) denom = denom[denom_mask]
# remove duplicate visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1) # add information visit_tr2["genreprice"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,"genreprice") visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"]) # probablity dataframe ------------------------------------ # create pivoted probability dataframe visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice") visit_pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom, "genreprice") # change column names visit_pivot_genreprice.rename(columns=lambda c: "v_{}".format(c), inplace=True) visit_pivot_genreprice_period.rename(columns=lambda c: "v_{}".format(c), inplace=True)
# remove duplicate visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1) # add information visit_tr2["small_area"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "small_area") visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "genre") visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genre", "small_area", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "small_area", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "small_area", "period"]) # exclude non-spot genre and same name with prefecture numer_mask = numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs)) numer = numer[numer_mask] numer = numer.rename(columns={"small_area": "sarea"}) denom_mask = denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs)) denom = denom[denom_mask] denom = denom.rename(columns={"small_area": "sarea"}) # probablity dataframe ------------------------------------