Пример #1
0
 user_features, user_features_header = getUserFeatures(
     user_dict[user_id], prefecture_location_dict, model_start_date)
 try:
     userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[
         user_id]
 except:
     userid_coupon_purchase_detail_list = []
 user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
     userid_coupon_purchase_detail_list, coupon_list_feature_dict,
     model_start_date, model_start_date)
 user_visit_features, user_visit_features_header = getUserVisitFeatures(
     user_id, coupon_visit_dict, model_start_date)
 for coupon_id in coupon_list_train_dict.keys():
     coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(
         user_dict[user_id], coupon_area_dict, coupon_id)
     coupon_features, coupon_features_header = getCouponFeatures(
         coupon_list_train_dict[coupon_id], master_list)
     dv_value = getDV(user_id, coupon_id, dv_dict)
     if out_row_count == 0:
         out_header = [
             "USER_ID_hash", "COUPON_ID_hash"
         ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [
             "DV"
         ]
         out_header_len = len(out_header)
         out_file.writerow(out_header)
     out_row = [
         user_id, coupon_id
     ] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [
         dv_value
     ]
     assert len(out_row) == out_header_len
Пример #2
0
                                                                             ind]:
                if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'],
                                              "%Y-%m-%d %H:%M:%S").date(
                                              ) < date_before_purchase_date:
                    new_coupon_purchase_detail_userid_list.append(
                        coupon_purchase_detail)
            #print new_coupon_purchase_detail_userid_list
            user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
                new_coupon_purchase_detail_userid_list,
                coupon_list_feature_dict, model_start_date,
                date_before_purchase_date)
            user_visit_features, user_visit_features_header = getUserVisitFeatures(
                user_id, coupon_visit_dict, date_before_purchase_date)

            # getting coupon for dv = 1 #
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_feature_dict[purchase_coupon], master_list)
            dv_value = 1
            if out_row_count == 0:
                out_header = [
                    "USER_ID_hash", "COUPON_ID_hash"
                ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_features_header + [
                    "DV"
                ]
                out_header_len = len(out_header)
                out_file.writerow(out_header)
            out_row = [
                user_id, purchase_coupon
            ] + user_features + user_hist_features + user_visit_features + coupon_features + [
                dv_value
            ]
            assert len(out_row) == out_header_len
                ):
                    new_coupon_purchase_detail_userid_list.append(coupon_purchase_detail)
                    # print new_coupon_purchase_detail_userid_list
            user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
                new_coupon_purchase_detail_userid_list,
                coupon_list_feature_dict,
                model_start_date,
                date_before_purchase_date,
            )
            user_visit_features, user_visit_features_header = getUserVisitFeatures(
                user_id, coupon_visit_dict, date_before_purchase_date
            )

            # getting coupon for dv = 1 #
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_feature_dict[purchase_coupon], master_list
            )
            dv_value = 1
            if out_row_count == 0:
                out_header = (
                    ["USER_ID_hash", "COUPON_ID_hash"]
                    + user_features_header
                    + user_hist_features_header
                    + user_visit_features_header
                    + coupon_features_header
                    + ["DV"]
                )
                out_header_len = len(out_header)
                out_file.writerow(out_header)
            out_row = (
                [user_id, purchase_coupon]
	

	print "Preparing the data.."
	user_count = 0
	out_row_count = 0
	for user_id in user_dict.keys():
		user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, model_start_date)
		try:
			userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[user_id]
		except:
			userid_coupon_purchase_detail_list = []
		user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date)
		user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, model_start_date)
		for coupon_id in coupon_list_train_dict.keys():
			coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, coupon_id)
			coupon_features, coupon_features_header = getCouponFeatures(coupon_list_train_dict[coupon_id], master_list)
			dv_value = getDV(user_id, coupon_id, dv_dict)
			if out_row_count == 0:
				out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"]
				out_header_len = len(out_header)
				out_file.writerow( out_header )
			out_row = [user_id, coupon_id] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value]
			assert len(out_row) == out_header_len
			out_file.writerow( out_row )
			out_row_count += 1
			#break
		user_count +=1
		#break
		if user_count % 200 == 0:
				print "Processed Users : ", user_count