Пример #1
0
    print "Preparing the data.."
    user_count = 0
    out_row_count = 0
    for user_id in user_dict.keys():
        user_features, user_features_header = getUserFeatures(
            user_dict[user_id], prefecture_location_dict, model_start_date)
        try:
            userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[
                user_id]
        except:
            userid_coupon_purchase_detail_list = []
        user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
            userid_coupon_purchase_detail_list, coupon_list_feature_dict,
            model_start_date, model_start_date)
        user_visit_features, user_visit_features_header = getUserVisitFeatures(
            user_id, coupon_visit_dict, model_start_date)
        for coupon_id in coupon_list_train_dict.keys():
            coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(
                user_dict[user_id], coupon_area_dict, coupon_id)
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_train_dict[coupon_id], master_list)
            dv_value = getDV(user_id, coupon_id, dv_dict)
            if out_row_count == 0:
                out_header = [
                    "USER_ID_hash", "COUPON_ID_hash"
                ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [
                    "DV"
                ]
                out_header_len = len(out_header)
                out_file.writerow(out_header)
            out_row = [
Пример #2
0
            # getting the new history list and getting user_hist features based on it #
            new_coupon_purchase_detail_userid_list = []
            for coupon_purchase_detail in coupon_purchase_detail_userid_list[:
                                                                             ind]:
                if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'],
                                              "%Y-%m-%d %H:%M:%S").date(
                                              ) < date_before_purchase_date:
                    new_coupon_purchase_detail_userid_list.append(
                        coupon_purchase_detail)
            #print new_coupon_purchase_detail_userid_list
            user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
                new_coupon_purchase_detail_userid_list,
                coupon_list_feature_dict, model_start_date,
                date_before_purchase_date)
            user_visit_features, user_visit_features_header = getUserVisitFeatures(
                user_id, coupon_visit_dict, date_before_purchase_date)

            # getting coupon for dv = 1 #
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_feature_dict[purchase_coupon], master_list)
            dv_value = 1
            if out_row_count == 0:
                out_header = [
                    "USER_ID_hash", "COUPON_ID_hash"
                ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_features_header + [
                    "DV"
                ]
                out_header_len = len(out_header)
                out_file.writerow(out_header)
            out_row = [
                user_id, purchase_coupon
            new_coupon_purchase_detail_userid_list = []
            for coupon_purchase_detail in coupon_purchase_detail_userid_list[:ind]:
                if (
                    datetime.datetime.strptime(coupon_purchase_detail["I_DATE"], "%Y-%m-%d %H:%M:%S").date()
                    < date_before_purchase_date
                ):
                    new_coupon_purchase_detail_userid_list.append(coupon_purchase_detail)
                    # print new_coupon_purchase_detail_userid_list
            user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
                new_coupon_purchase_detail_userid_list,
                coupon_list_feature_dict,
                model_start_date,
                date_before_purchase_date,
            )
            user_visit_features, user_visit_features_header = getUserVisitFeatures(
                user_id, coupon_visit_dict, date_before_purchase_date
            )

            # getting coupon for dv = 1 #
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_feature_dict[purchase_coupon], master_list
            )
            dv_value = 1
            if out_row_count == 0:
                out_header = (
                    ["USER_ID_hash", "COUPON_ID_hash"]
                    + user_features_header
                    + user_hist_features_header
                    + user_visit_features_header
                    + coupon_features_header
                    + ["DV"]
        # getting coupon visit history of users in form of dict#
        print "Getting the visit history of users.."
        coupon_visit_dict = getCouponVisitDict(coupon_visit_feature_file)
	

	print "Preparing the data.."
	user_count = 0
	out_row_count = 0
	for user_id in user_dict.keys():
		user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, model_start_date)
		try:
			userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[user_id]
		except:
			userid_coupon_purchase_detail_list = []
		user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date)
		user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, model_start_date)
		for coupon_id in coupon_list_train_dict.keys():
			coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, coupon_id)
			coupon_features, coupon_features_header = getCouponFeatures(coupon_list_train_dict[coupon_id], master_list)
			dv_value = getDV(user_id, coupon_id, dv_dict)
			if out_row_count == 0:
				out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"]
				out_header_len = len(out_header)
				out_file.writerow( out_header )
			out_row = [user_id, coupon_id] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value]
			assert len(out_row) == out_header_len
			out_file.writerow( out_row )
			out_row_count += 1
			#break
		user_count +=1
		#break