import sys sys.path.append('./') import time_stamp start_date = [ '20150101', '20150115', '20150129', '20150212', '20150226', '20150312', '20150326', '20150409', '20150423', '20150526' ] end_date = [ '20150129', '20150212', '20150226', '20150312', '20150326', '20150409', '20150423', '20150507', '20150521', '20150623' ] start_time = [0] * 10 for i in range(len(start_date)): start_time[i] = time_stamp.get_seconds(start_date[i]) end_time = [0] * 10 for i in range(len(end_date)): end_time[i] = time_stamp.get_seconds(end_date[i]) for line in sys.stdin: arr = line.strip().split(',') user = arr[0] browser = arr[4] time = int(arr[7]) for i in range(10): if time >= start_time[i] and time <= end_time[i]: print str(i) + '#' + user + '#' + browser + '\t' + line.strip()
caid_feature = {} for line in open('feature_caid_9'): line = line.strip() idx = line.index(' ') caid = line[2:idx] caid_feature[caid] = line[idx + 1:] browser_feature = {} for line in open('feature_browser_9'): line = line.strip() idx = line.index(' ') user = line[2:idx] browser_feature[user] = line[idx + 1:] begin_time = time_stamp.get_seconds(sys.argv[1]) end_time = time_stamp.get_seconds(sys.argv[2]) test_end_time = time_stamp.get_seconds(sys.argv[3]) last_user = '' cur_user = '' user_list = [] for line in sys.stdin: line = line.strip() arr = line.split('\t') cur_user = arr[0] temp = arr[1] arr = temp.split(',') time = int(arr[7]) if time < begin_time or time > test_end_time: continue
exposure = [0] * 5 large_click_time = 999999999 large_exposure_time = 999999999 click_user_dict = {} click_week = [0] * 5 exposure_week = [0] * 5 click_5days = [0] * 5 exposure_5days = [0] * 5 click_week_set = set() exposure_week_set = set() for line in sys.stdin: arr = line.strip().split('\t') cur_spid = arr[0] end_time = time_stamp.get_seconds(end_date[int(cur_spid[0])]) arr = arr[1].split(',') user = arr[0] time = time_stamp.get_date(arr[7]) day_diff = time_stamp.get_date_diff(arr[7], end_time) week = time_stamp.get_week(arr[7]) if cur_spid != last_spid and last_spid != '': #print the last_spid spid_str = last_spid all_exposure_num = all_click_num + all_exposure_num spid_str = spid_str + ' 67:' + str(all_exposure_num) if all_click_num > 0: spid_str = spid_str + ' 68:' + str( all_click_num) + ' 69:' + my_format.string_cf( all_click_num, all_exposure_num)