user_features, user_features_header = getUserFeatures( user_dict[user_id], prefecture_location_dict, model_start_date) try: userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[ user_id] except: userid_coupon_purchase_detail_list = [] user_hist_features, user_hist_features_header, master_list = getUserHistFeatures( userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date) user_visit_features, user_visit_features_header = getUserVisitFeatures( user_id, coupon_visit_dict, model_start_date) for coupon_id in coupon_list_train_dict.keys(): coupon_area_features, coupon_area_features_header = getCouponAreaFeatures( user_dict[user_id], coupon_area_dict, coupon_id) coupon_features, coupon_features_header = getCouponFeatures( coupon_list_train_dict[coupon_id], master_list) dv_value = getDV(user_id, coupon_id, dv_dict) if out_row_count == 0: out_header = [ "USER_ID_hash", "COUPON_ID_hash" ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [ "DV" ] out_header_len = len(out_header) out_file.writerow(out_header) out_row = [ user_id, coupon_id ] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [ dv_value ] assert len(out_row) == out_header_len
ind]: if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'], "%Y-%m-%d %H:%M:%S").date( ) < date_before_purchase_date: new_coupon_purchase_detail_userid_list.append( coupon_purchase_detail) #print new_coupon_purchase_detail_userid_list user_hist_features, user_hist_features_header, master_list = getUserHistFeatures( new_coupon_purchase_detail_userid_list, coupon_list_feature_dict, model_start_date, date_before_purchase_date) user_visit_features, user_visit_features_header = getUserVisitFeatures( user_id, coupon_visit_dict, date_before_purchase_date) # getting coupon for dv = 1 # coupon_features, coupon_features_header = getCouponFeatures( coupon_list_feature_dict[purchase_coupon], master_list) dv_value = 1 if out_row_count == 0: out_header = [ "USER_ID_hash", "COUPON_ID_hash" ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_features_header + [ "DV" ] out_header_len = len(out_header) out_file.writerow(out_header) out_row = [ user_id, purchase_coupon ] + user_features + user_hist_features + user_visit_features + coupon_features + [ dv_value ] assert len(out_row) == out_header_len
): new_coupon_purchase_detail_userid_list.append(coupon_purchase_detail) # print new_coupon_purchase_detail_userid_list user_hist_features, user_hist_features_header, master_list = getUserHistFeatures( new_coupon_purchase_detail_userid_list, coupon_list_feature_dict, model_start_date, date_before_purchase_date, ) user_visit_features, user_visit_features_header = getUserVisitFeatures( user_id, coupon_visit_dict, date_before_purchase_date ) # getting coupon for dv = 1 # coupon_features, coupon_features_header = getCouponFeatures( coupon_list_feature_dict[purchase_coupon], master_list ) dv_value = 1 if out_row_count == 0: out_header = ( ["USER_ID_hash", "COUPON_ID_hash"] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_features_header + ["DV"] ) out_header_len = len(out_header) out_file.writerow(out_header) out_row = ( [user_id, purchase_coupon]
print "Preparing the data.." user_count = 0 out_row_count = 0 for user_id in user_dict.keys(): user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, model_start_date) try: userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[user_id] except: userid_coupon_purchase_detail_list = [] user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date) user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, model_start_date) for coupon_id in coupon_list_train_dict.keys(): coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, coupon_id) coupon_features, coupon_features_header = getCouponFeatures(coupon_list_train_dict[coupon_id], master_list) dv_value = getDV(user_id, coupon_id, dv_dict) if out_row_count == 0: out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"] out_header_len = len(out_header) out_file.writerow( out_header ) out_row = [user_id, coupon_id] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value] assert len(out_row) == out_header_len out_file.writerow( out_row ) out_row_count += 1 #break user_count +=1 #break if user_count % 200 == 0: print "Processed Users : ", user_count