def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) y, m = int('20' + yymm[:2]), int(yymm[2:]) # find the next month's first day if m == 12: next_y, next_m = y + 1, 1 else: next_y, next_m = y, m + 1 next_m_first_day = datetime.datetime(next_y, next_m, 1, 0) cur_m_last_day = next_m_first_day - datetime.timedelta(days=1) dd = '%2d' % cur_m_last_day.day last_day_timestamp = time.mktime(cur_m_last_day.timetuple()) with open('%s/%s' % (logs_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_time = headers.index('time') with open('%s/log-last-day-%s%s.csv' % (log_last_day_dir, yymm, dd), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow(headers) for row in reader: t = eval(row[id_time]) if t <= last_day_timestamp: continue writer.writerow(row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # is_driver_vehicle = load_picle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm)) full_drivers = set() with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_yy, id_mm, id_dd, id_hh = headers.index('yy'), headers.index('mm'), headers.index('dd'), headers.index('hh') id_vid, id_did = headers.index('vid'), headers.index('did') id_pro_dur, id_x_pro_dur = headers.index('pro-dur'), headers.index('x-pro-dur') with open('%s/shift-full-time-%s.csv' % (full_shift_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['year', 'month', 'day', 'hour', 'vehicle-id', 'driver-id', 'productive-duration', 'x-productive-duration'] writer.writerow(new_headers) for row in reader: if len(is_driver_vehicle[row[id_vid]]) > 1: continue writer.writerow([row[id_yy], row[id_mm], row[id_dd], row[id_hh], row[id_vid], row[id_did], row[id_pro_dur], row[id_x_pro_dur]]) full_drivers.add(row[id_did]) save_pickle_file('%s/full-time-drivers-%s.pkl' % (full_shift_dir, yymm), full_drivers) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file): print path_to_csv_file ori_log_fn = path_to_csv_file.split('/')[-1] _, yymm, _ = ori_log_fn.split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open(path_to_csv_file, 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_time, id_vid, id_did = headers.index('time'), headers.index( 'vehicle-id'), headers.index('driver-id') index_long, index_lat = headers.index('longitude'), headers.index( 'latitude') with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not'] writer.writerow(new_headers) # for row in reader: ap_or_not = is_in_airport(eval(row[index_long]), eval(row[index_lat])) np_or_not = is_in_night_safari(eval(row[index_long]), eval(row[index_lat])) new_row = [ row[id_time], row[id_vid], row[id_did], ap_or_not, np_or_not ] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') # print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {} vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {} if yymm not in ['0901', '1001', '1011']: path_to_last_day_csv_file = None temp_csv_files = get_all_files(log_last_day_dir, '', '.csv') prev_fn = None y, m = int(yymm[:2]), int(yymm[2:]) prev_m = m - 1 prev_yymm = '%02d%02d' % (y, prev_m) for temp_fn in temp_csv_files: if temp_fn.startswith('log-last-day-%s' % prev_yymm): prev_fn = temp_fn break assert prev_fn, yymm path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \ record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) path_to_csv_file = '%s/%s' % (logs_dir, fn) vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \ record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) # save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ap_crossing_time_from_out_to_in) save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ns_crossing_time_from_out_to_in) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm)) # yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) dd, hh = 1, 0 cur_day_time = datetime.datetime(yyyy, mm, dd, hh) if mm == 12: next_yyyy, next_mm = yyyy + 1, 1 else: next_yyyy, next_mm = yyyy, mm + 1 last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh) # st_label = 'start-time' ap_tm_lable, ns_tm_lable = 'ap-trip-mode', 'ns-trip-mode' dur_lable, fare_label = 'duration', 'fare' # ap_tm = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP] ns_tm = [DInNS_PInNS, DInNS_POutNS, DOutNS_PInNS, DOutNS_POutNS] # while cur_day_time != last_day_time: next_day_time = cur_day_time + datetime.timedelta(hours=1) st_timestamp, et_timestamp = time.mktime(cur_day_time.timetuple()), time.mktime(next_day_time.timetuple()) # yyyy, mm, dd, hh = cur_day_time.year, cur_day_time.month, cur_day_time.day, cur_day_time.hour # filtered_trip = trip_df[(st_timestamp <= trip_df[st_label]) & (trip_df[st_label] < et_timestamp)] # gp_f_trip = filtered_trip.groupby([ap_tm_lable]) tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ap_tm] tm_num_df = gp_f_trip.count()[fare_label].to_frame('total_tm_num').reset_index() for tm, num in tm_num_df.values: tm_num_totalDuration_totalFare[tm][1] += num tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame('total_tm_dur').reset_index() for tm, dur in tm_dur_df.values: tm_num_totalDuration_totalFare[tm][2] += dur tm_fare_df = gp_f_trip.sum()[fare_label].to_frame('total_tm_fare').reset_index() for tm, fare in tm_fare_df.values: tm_num_totalDuration_totalFare[tm][3] += fare save_as_csv(ap_fn, yymm, dd, hh, tm_num_totalDuration_totalFare) # gp_f_trip = filtered_trip.groupby([ns_tm_lable]) tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ns_tm] tm_num_df = gp_f_trip.count()[fare_label].to_frame('total_tm_num').reset_index() for tm, num in tm_num_df.values: tm_num_totalDuration_totalFare[tm][1] += num tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame('total_tm_dur').reset_index() for tm, dur in tm_dur_df.values: tm_num_totalDuration_totalFare[tm][2] += dur tm_fare_df = gp_f_trip.sum()[fare_label].to_frame('total_tm_fare').reset_index() for tm, fare in tm_fare_df.values: tm_num_totalDuration_totalFare[tm][3] += fare save_as_csv(ns_fn, yymm, dd, hh, tm_num_totalDuration_totalFare) # cur_day_time = next_day_time print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm)
def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open('%s/%s' % (merged_trip_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() # id_tid, id_vid, id_did = headers.index('trip-id'), headers.index('vehicle-id'), headers.index('driver-id') id_st, id_et = headers.index('start-time'), headers.index('end-time') id_dur, id_fare = headers.index('duration'), headers.index('fare') id_s_long, id_s_lat = headers.index('start-long'), headers.index('start-lat') id_e_long, id_e_lat = headers.index('end-long'), headers.index('end-lat') # vehicle_prev_trip_position_time = {} with open('%s/whole-trip-%s.csv' % (trips_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['tid', 'vid', 'did', 'start-time', 'end-time', 'duration', 'fare', 'ap-trip-mode', 'ns-trip-mode', 'prev-trip-end-time'] writer.writerow(new_headers) for row in reader: vid = row[id_vid] start_time, end_time = eval(row[id_st]), eval(row[id_et]), s_long, s_lat = eval(row[id_s_long]), eval(row[id_s_lat]) e_long, e_lat = eval(row[id_e_long]), eval(row[id_e_lat]) # c_start_ter, c_end_ter = check_terminal_num(s_long, s_lat), check_terminal_num(e_long, e_lat) c_sl_ns, c_el_ns = is_in_night_safari(s_long, s_lat), is_in_night_safari(e_long, e_lat) # if not vehicle_prev_trip_position_time.has_key(vid): # ASSUMPTION # If this trip is the driver's first trip in a month, # let's assume that the previous trip occurred out of the airport and out of the night safari # and also assume that the previous trip's end time is the current trip's start time # -1 represents out of airport zone vehicle_prev_trip_position_time[vid] = (-1, OUT_NS, start_time) prev_trip_end_ter, prev_trip_end_loc_ns, prev_trip_time = vehicle_prev_trip_position_time[vid] ap_trip_mode, ns_trip_mode = None, None if prev_trip_end_ter != -1 and c_start_ter != -1 : ap_trip_mode = DInAP_PInAP elif prev_trip_end_ter != -1 and c_start_ter == -1: ap_trip_mode = DInAP_POutAP elif prev_trip_end_ter == -1 and c_start_ter != -1: ap_trip_mode = DOutAP_PInAP elif prev_trip_end_ter == -1 and c_start_ter == -1: ap_trip_mode = DOutAP_POutAP else: assert False # if prev_trip_end_loc_ns == IN_NS and c_sl_ns == IN_NS: ns_trip_mode = DInNS_PInNS elif prev_trip_end_loc_ns == IN_NS and c_sl_ns == OUT_NS: ns_trip_mode = DInNS_POutNS elif prev_trip_end_loc_ns == OUT_NS and c_sl_ns == IN_NS: ns_trip_mode = DOutNS_PInNS elif prev_trip_end_loc_ns == OUT_NS and c_sl_ns == OUT_NS: ns_trip_mode = DOutNS_POutNS else: assert False new_row = [row[id_tid], vid, row[id_did], start_time, end_time, row[id_dur], row[id_fare], ap_trip_mode, ns_trip_mode, prev_trip_time] writer.writerow(new_row) # vehicle_prev_trip_position_time[vid] = (c_end_ter, c_el_ns, end_time) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def run(): csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: try: put_task(process_file, [fn]) except Exception as _: logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def process_files(yymm): print "handle the file; %s" % yymm logging_msg("handle the file; %s" % yymm) # init_csv_files(yymm) # full_dids = sorted( [eval(x) for x in load_picle_file("%s/%s%s.pkl" % (full_shift_dir, monthly_full_did_prefix, yymm))] ) s_df = pd.read_csv("%s/%s%s.csv" % (full_shift_dir, sh_full_prefix, yymm)) trip_df = pd.read_csv("%s/%s%s.csv" % (trips_dir, trip_prefix, yymm)) ap_trip_df = pd.read_csv("%s/%s%s.csv" % (airport_trips_dir, ap_trip_op_ep_prefix, yymm)) # yy, mm = int(yymm[:2]), int(yymm[2:]) for did in full_dids: # General did_sh = s_df[(s_df["driver-id"] == did)] pro_dur = sum(did_sh["productive-duration"]) * SEC did_wt = trip_df[(trip_df["did"] == did)] total_fare = sum(did_wt["fare"]) if pro_dur > 0 and total_fare != 0: total_prod = total_fare / pro_dur with open("%s/%s%s.csv" % (individual_detail_dir, general_prefix, yymm), "a") as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod]) # did_ap = ap_trip_df[(ap_trip_df["did"] == did)] prev_in_ap_trip = did_ap[(did_ap["trip-mode"] == DInAP_PInAP)] prev_out_ap_trip = did_ap[(did_ap["trip-mode"] == DOutAP_PInAP)] # if len(did_ap) != 0: # prev in ap trip ap_qu, ap_dur = sum(prev_in_ap_trip["queue-time"]), sum(prev_in_ap_trip["duration"]) ap_fare = sum(prev_in_ap_trip["fare"]) ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip["op-cost"]), sum(prev_in_ap_trip["economic"]) if ap_qu + ap_dur > 0 and ap_fare != 0: ap_prod = ap_fare / (ap_qu + ap_dur) with open("%s/%s%s.csv" % (individual_detail_dir, prev_in_ap_prefix, yymm), "a") as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit]) # # prev out ap trip ap_qu, ap_dur = sum(prev_out_ap_trip["queue-time"]), sum(prev_out_ap_trip["duration"]) ap_fare = sum(prev_out_ap_trip["fare"]) ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip["op-cost"]), sum(prev_out_ap_trip["economic"]) if ap_qu + ap_dur > 0 and ap_fare != 0: ap_prod = ap_fare / (ap_qu + ap_dur) with open("%s/%s%s.csv" % (individual_detail_dir, prev_out_ap_prefix, yymm), "a") as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit]) print "End the file; %s" % yymm logging_msg("End the file; %s" % yymm)
def run(): remove_creat_dir(full_shift_dir) csv_files = get_all_files(shifts_dir, '', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: try: put_task(process_file, [fn]) except Exception as _: logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(individual_detail_dir) # process_files('1007') init_multiprocessor() count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): try: yymm = "%02d%02d" % (y, m) if yymm in ["0912", "1010"]: continue put_task(process_files, [yymm]) except Exception as _: logging_msg("Algorithm runtime exception (%s)\n" % (yymm) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) new_fn = '%s/hourly-summary-%s.csv' % (hourly_summary, yymm) with open(new_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) headers = [ 'yy', 'mm', 'dow', 'hh', 'trip-mode', 'total-num', 'total-fare' ] writer.writerow(headers) # trip_df = pd.read_csv('%s/%s' % (trips_dir, fn)) yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) dd, hh = 1, 0 cur_datetime = datetime.datetime(yyyy, mm, dd, hh) if mm == 12: next_yyyy, next_mm = yyyy + 1, 1 else: next_yyyy, next_mm = yyyy, mm + 1 last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh) # st_label = 'start-time' fare_label = 'fare' tms = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP] while cur_datetime != last_day_time: next_datetime = cur_datetime + datetime.timedelta(hours=1) cur_timestamp, next_timestamp = time.mktime( cur_datetime.timetuple()), time.mktime(next_datetime.timetuple()) filtered_trip = trip_df[(cur_timestamp <= trip_df[st_label]) & (trip_df[st_label] < next_timestamp)] # tm_grouped = filtered_trip.groupby(['trip-mode'], sort=True) yy, mm = yymm[:2], yymm[2:] dow, hh = cur_datetime.strftime("%a"), cur_datetime.hour with open(new_fn, 'a') as csvFile: writer = csv.writer(csvFile) tm_totalNum_totalFare = zip(tms, list(tm_grouped.count()[fare_label]), list(tm_grouped.sum()[fare_label])) for tm, totalNum, totalFare in tm_totalNum_totalFare: writer.writerow([yy, mm, dow, hh, tm, totalNum, totalFare]) cur_datetime = next_datetime print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-0.10-GAMMA-0.50/ALPHA-0.10-GAMMA-0.50-q-value-fare-dur-%s.pkl'%(for_learning_dir,yymm)) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP # whole_rev, sub_rev = 0, 0 whole_count, sub_count = 0,0 count = 0 with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 if argmax_as[(s1, s2, s3)] == a: sub_rev += economic_profit sub_count += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file('%s/comparision-%s.pkl'%(for_full_driver_dir, yymm), [whole_rev, whole_count, sub_rev, sub_count])
def run(): remove_creat_dir(individual_detail_dir) # process_files('1007') init_multiprocessor() count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): try: yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue put_task(process_files, [yymm]) except Exception as _: logging_msg('Algorithm runtime exception (%s)\n' % (yymm) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open('%s/%s' % (merged_trip_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() # id_vid = headers.index('vehicle-id') id_st, id_et = headers.index('start-time'), headers.index('end-time') id_dur, id_fare = headers.index('duration'), headers.index('fare') id_s_long, id_s_lat = headers.index('start-long'), headers.index('start-lat') id_e_long, id_e_lat = headers.index('end-long'), headers.index('end-lat') # vehicle_prev_trip_position_time = {} with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['prev-trip-end-time', 'prev-trip-end-location', 'start-time', 'start-location', 'end-time', 'end-location', 'duration', 'fare'] writer.writerow(new_headers) for row in reader: vid = row[id_vid] start_time, end_time = eval(row[id_st]), eval(row[id_et]), s_long, s_lat = eval(row[id_s_long]), eval(row[id_s_lat]) e_long, e_lat = eval(row[id_e_long]), eval(row[id_e_lat]) s_location, e_location = is_in_airport(s_long, s_lat), is_in_airport(e_long, e_lat) # if not vehicle_prev_trip_position_time.has_key(vid): # ASSUMPTION # If this trip is the driver's first trip in a month, # let's assume that the previous trip occurred out of the airport # and also assume that the previous trip's end time is the current trip's start time vehicle_prev_trip_position_time[vid] = (OUT_AP, start_time) prev_trip_end_location, prev_trip_end_time = vehicle_prev_trip_position_time[vid] # new_row = [prev_trip_end_time, prev_trip_end_location, start_time, s_location, end_time, e_location, row[id_dur], row[id_fare]] writer.writerow(new_row) # vehicle_prev_trip_position_time[vid] = (e_location, end_time) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) new_fn = '%s/hourly-summary-%s.csv' % (hourly_summary, yymm) with open(new_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) headers = ['yy', 'mm', 'dow', 'hh', 'trip-mode', 'total-num', 'total-fare'] writer.writerow(headers) # trip_df = pd.read_csv('%s/%s' % (trips_dir, fn)) yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) dd, hh = 1, 0 cur_datetime = datetime.datetime(yyyy, mm, dd, hh) if mm == 12: next_yyyy, next_mm = yyyy + 1, 1 else: next_yyyy, next_mm = yyyy, mm + 1 last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh) # st_label = 'start-time' fare_label = 'fare' tms = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP] while cur_datetime != last_day_time: next_datetime = cur_datetime + datetime.timedelta(hours=1) cur_timestamp, next_timestamp = time.mktime(cur_datetime.timetuple()), time.mktime(next_datetime.timetuple()) filtered_trip = trip_df[(cur_timestamp <= trip_df[st_label]) & (trip_df[st_label] < next_timestamp)] # tm_grouped = filtered_trip.groupby(['trip-mode'], sort=True) yy, mm = yymm[:2], yymm[2:] dow, hh = cur_datetime.strftime("%a"), cur_datetime.hour with open(new_fn, 'a') as csvFile: writer = csv.writer(csvFile) tm_totalNum_totalFare = zip(tms, list(tm_grouped.count()[fare_label]), list(tm_grouped.sum()[fare_label])) for tm, totalNum, totalFare in tm_totalNum_totalFare: writer.writerow([yy, mm, dow, hh, tm, totalNum, totalFare]) cur_datetime = next_datetime print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(path_to_csv_file): print path_to_csv_file ori_log_fn = path_to_csv_file.split('/')[-1] _, yymm, _ = ori_log_fn.split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open(path_to_csv_file, 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_time, id_vid, id_did = headers.index('time'), headers.index('vehicle-id'), headers.index('driver-id') index_long, index_lat = headers.index('longitude'), headers.index('latitude') with open('%s/log-%s.csv' % (logs_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['time', 'vid', 'did', 'ap-or-not', 'np-or-not'] writer.writerow(new_headers) # for row in reader: ap_or_not = is_in_airport(eval(row[index_long]), eval(row[index_lat])) np_or_not = is_in_night_safari(eval(row[index_long]), eval(row[index_lat])) new_row = [row[id_time], row[id_vid], row[id_did], ap_or_not, np_or_not] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # driver_vehicle = {} productive_state = ['dur%d' % x for x in [0, 3, 4, 5, 6, 7, 8, 9, 10]] with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} with open('%s/shift-pro-dur-%s.csv' % (shift_pro_dur_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['yy', 'mm', 'dd', 'hh', 'vid', 'did', 'pro-dur'] writer.writerow(new_headers) for row in reader: vid, did = row[hid['vehicle-id']], row[hid['driver-id']] productive_duration = sum(int(row[hid[dur]]) for dur in productive_state) writer.writerow([row[hid['year']][-2:], row[hid['month']], row[hid['day']], row[hid['hour']], vid, did, productive_duration]) driver_vehicle.setdefault(vid, set()).add(did) save_pickle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm), driver_vehicle) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): old_time = time.time() print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) begin_timestamp = datetime.datetime(2009, 1, 1, 0) last_timestamp = datetime.datetime(2011, 2, 1, 0) hourly_total, time_period_order = {}, [] while begin_timestamp < last_timestamp: yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour k = (yyyy, mm, dd, hh) hourly_total[k] = [0 for _ in range(len([NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) begin_timestamp += datetime.timedelta(hours=1) # st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare' ns_qt_label = 'ns-queue-time' # Night Safari fare, duration and queue time with open('%s/%s%s.csv' % (nightsafari_trips_dir, ns_trip_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]]) dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]]) ns_qt = eval(row[hid[ns_qt_label]]) # st_dt, et_dt = datetime.datetime.fromtimestamp( st_ts), datetime.datetime.fromtimestamp(et_ts) # Duration and queue time if st_dt.hour == et_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][NS_DUR] += dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][NS_FARE] += fare else: next_ts_dt = datetime.datetime( st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) dur_within_slot = tg_ts - st_ts prop = dur_within_slot / dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][NS_DUR] += dur_within_slot hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][NS_FARE] += fare * prop while True: if tg_dt.hour == et_dt.hour: dur_within_slot = et_ts - tg_ts prop = dur_within_slot / dur hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][NS_DUR] += dur_within_slot hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][NS_FARE] += fare * prop break prop = HOUR / dur hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][NS_DUR] += HOUR hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][NS_DUR] += fare * prop tg_dt += datetime.timedelta(hours=1) # Queue time if ns_qt < Q_LIMIT_MIN: ns_qt = Q_LIMIT_MIN q_jt_ts = st_ts - ns_qt q_jt_dt = datetime.datetime.fromtimestamp(q_jt_ts) if q_jt_dt.hour == st_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][NS_QUEUE] += ns_qt else: next_ts_dt = datetime.datetime( q_jt_dt.year, q_jt_dt.month, q_jt_dt.day, q_jt_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) hourly_total[(q_jt_dt.year, q_jt_dt.month, q_jt_dt.day, q_jt_dt.hour)][NS_QUEUE] += tg_ts - q_jt_ts while True: if tg_dt.hour == st_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][NS_QUEUE] += st_ts - tg_ts break hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][NS_QUEUE] += HOUR tg_dt += datetime.timedelta(hours=1) if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) with open( '%s/%s%s.csv' % (ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = [ 'yy', 'mm', 'dd', 'hh', 'ns-duration', 'ns-fare', 'ns-queue-time' ] writer.writerow(header) for yyyy, mm, dd, hh in time_period_order: ns_dur, ns_fare, ns_qt = hourly_total[(yyyy, mm, dd, hh)] writer.writerow([yyyy - 2000, mm, dd, hh, ns_dur, ns_fare, ns_qt]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open('%s/%s' % (airport_trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_tid, id_vid, id_did = headers.index('tid'), headers.index('vid'), headers.index('did') id_st, id_et, id_dur = headers.index('start-time'), headers.index('end-time'), headers.index('duration') id_fare = headers.index('fare') id_tm, id_pt_et = headers.index('trip-mode'), headers.index('prev-trip-end-time') id_jqt, id_qt = headers.index('join-queue-time'), headers.index('queue-time') with open('%s/ap-trip-op-ep-%s.csv' % (airport_trips_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['tid', 'vid', 'did', 'start-time', 'end-time', 'duration', 'fare', 'trip-mode', 'prev-trip-end-time', 'join-queue-time', 'queue-time', 'op-cost', 'economic', 'yy', 'mm', 'dd', 'hh'] writer.writerow(new_headers) for row in reader: jqt, st, et = eval(row[id_jqt]), eval(row[id_st]), eval(row[id_et]) dur, fare = eval(row[id_dur]), eval(row[id_fare]) if st - jqt < Q_LIMIT_MIN: qt = Q_LIMIT_MIN elif Q_LIMIT_MAX < st - jqt: qt = Q_LIMIT_MAX else: qt = st - jqt modi_jqt = st - qt jqt_datetime = datetime.datetime.fromtimestamp(modi_jqt) st_datetime = datetime.datetime.fromtimestamp(st) et_datetime = datetime.datetime.fromtimestamp(et) op_cost = 0 st_yyyy, st_mm, st_dd, st_hh = st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour if jqt_datetime.hour == st_datetime.hour: try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += qt * op_cost_per_sec else: tp = datetime.datetime(st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour) tp_timestamp = time.mktime(tp.timetuple()) p_jqt_st = (tp_timestamp - modi_jqt) / qt prev_dt = st_datetime - datetime.timedelta(hours=1) try: op_cost_per_sec = op_costs[(prev_dt.year, prev_dt.month, prev_dt.day, prev_dt.hour)] except KeyError: alternative_datetime = prev_dt - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * p_jqt_st # try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * (1 - p_jqt_st) if st_datetime.hour == et_datetime.hour: try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += dur * op_cost_per_sec else: # This part don't regards cases when duration is more than a hour tp = datetime.datetime(et_datetime.year, et_datetime.month, et_datetime.day, et_datetime.hour) tp_timestamp = time.mktime(tp.timetuple()) p_st_et = (tp_timestamp - st) / dur next_dt = st_datetime + datetime.timedelta(hours=1) try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * p_st_et # try: op_cost_per_sec = op_costs[(next_dt.year, next_dt.month, next_dt.day, next_dt.hour)] except KeyError: alternative_datetime = next_dt - datetime.timedelta(hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * (1 - p_st_et) economic_profit = fare - op_cost # writer.writerow([row[id_tid], row[id_vid], row[id_did], st, et, dur, fare, row[id_tm], row[id_pt_et], modi_jqt, qt, op_cost, economic_profit, st_yyyy, st_mm, st_dd, st_hh]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # print yymm if yymm == '0901': prev_yymm = None elif yymm == '1001': prev_yymm = '0911' elif yymm == '1011': prev_yymm = '1009' else: yy, mm = int(yymm[:2]), int(yymm[2:]) prev_yymm = '%02d%02d' % (yy, mm - 1) # if not prev_yymm: Qsa_value, state_action_fare_dur = {}, {} locations = [IN_AP, OUT_AP] actions = [IN_AP, OUT_AP] for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in locations: for a in actions: Qsa_value[(s1, s2, s3, a)] = 0 state_action_fare_dur[(s1, s2, s3, a)] = [0, 0] else: Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm)) # with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_etime, id_eloc = headers.index('end-time'), headers.index('end-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') # count = 0 for row in reader: prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(row[id_stime]), eval(row[id_etime]) setup_time = stime - prev_tetime # if setup_time < 0 or HOUR * 2 < setup_time: continue # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # etime_datetime = datetime.datetime.fromtimestamp(etime) new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour new_s3 = row[id_eloc] # a = row[id_sloc] dur, fare = eval(row[id_dur]), eval(row[id_fare]) # state_action_fare_dur[(s1, s2, s3, a)][0] += fare state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur # if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] : future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] else: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] # alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] qrs = fare - op_cost + GAMMA * future_max_q_value Qsa_value[(s1, s2, s3, a)] = \ (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): old_time = time.time() print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) begin_timestamp = datetime.datetime(2009, 1, 1, 0) last_timestamp = datetime.datetime(2011, 2, 1, 0) hourly_total, time_period_order = {}, [] while begin_timestamp < last_timestamp: yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour k = (yyyy, mm, dd, hh) hourly_total[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE]))] time_period_order.append(k) begin_timestamp += datetime.timedelta(hours=1) # st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare' # Productive duration yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) with open('%s/%s%s.csv' % (shift_pro_dur_dir, shift_pro_dur_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: dd, hh = eval(row[hid['dd']]), eval(row[hid['hh']]) hourly_total[(yyyy, mm, dd, hh)][GEN_DUR] += eval(row[hid['pro-dur']]) * 60 # unit change; Minute -> Second if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) # Total fare with open('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]]) dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]]) # st_dt, et_dt = datetime.datetime.fromtimestamp(st_ts), datetime.datetime.fromtimestamp(et_ts) # if st_dt.hour == et_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][GEN_FARE] += fare else: next_ts_dt = datetime.datetime(st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) prop = (tg_ts - st_ts) / dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][GEN_FARE] += fare * prop while True: if tg_dt.hour == et_dt.hour: prop = (et_ts - tg_ts) / dur hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][GEN_FARE] += fare * prop break prop = HOUR / dur hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][GEN_FARE] += fare * prop tg_dt += datetime.timedelta(hours=1) if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) with open('%s/%s%s.csv' % (general_dur_fare_dir, general_dur_fare_prefix, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare'] writer.writerow(header) for yyyy, mm, dd, hh in time_period_order: gen_dur, gen_fare = hourly_total[(yyyy, mm, dd, hh)] writer.writerow([yyyy - 2000, mm, dd, hh, gen_dur, gen_fare]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): old_time = time.time() print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) begin_timestamp = datetime.datetime(2009, 1, 1, 0) last_timestamp = datetime.datetime(2011, 2, 1, 0) hourly_total, time_period_order = {}, [] while begin_timestamp < last_timestamp: yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour k = (yyyy, mm, dd, hh) hourly_total[k] = [0 for _ in range(len([AP_DUR, AP_FARE, AP_QUEUE]))] time_period_order.append(k) begin_timestamp += datetime.timedelta(hours=1) # st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare' ap_qt_label = 'ap-queue-time' # Airport fare, duration and queue time with open('%s/%s%s.csv' % (airport_trips_dir, ap_trip_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]]) dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]]) ap_qt = eval(row[hid[ap_qt_label]]) # st_dt, et_dt = datetime.datetime.fromtimestamp(st_ts), datetime.datetime.fromtimestamp(et_ts) # Duration and queue time if st_dt.hour == et_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][AP_DUR] += dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][AP_FARE] += fare else: next_ts_dt = datetime.datetime(st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) dur_within_slot = tg_ts - st_ts prop = dur_within_slot / dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][AP_DUR] += dur_within_slot hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][AP_FARE] += fare * prop while True: if tg_dt.hour == et_dt.hour: dur_within_slot = et_ts - tg_ts prop = dur_within_slot / dur hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][AP_DUR] += dur_within_slot hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][AP_FARE] += fare * prop break prop = HOUR / dur hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][AP_DUR] += HOUR hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][AP_DUR] += fare * prop tg_dt += datetime.timedelta(hours=1) # Queue time if ap_qt < Q_LIMIT_MIN: ap_qt = Q_LIMIT_MIN q_jt_ts = st_ts - ap_qt q_jt_dt = datetime.datetime.fromtimestamp(q_jt_ts) if q_jt_dt.hour == st_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][AP_QUEUE] += ap_qt else: next_ts_dt = datetime.datetime(q_jt_dt.year, q_jt_dt.month, q_jt_dt.day, q_jt_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) hourly_total[(q_jt_dt.year, q_jt_dt.month, q_jt_dt.day, q_jt_dt.hour)][AP_QUEUE] += tg_ts - q_jt_ts while True: if tg_dt.hour == st_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][AP_QUEUE] += st_ts - tg_ts break hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][AP_QUEUE] += HOUR tg_dt += datetime.timedelta(hours=1) if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) with open('%s/%s%s.csv' % (ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'ap-duration', 'ap-fare', 'ap-queue-time'] writer.writerow(header) for yyyy, mm, dd, hh in time_period_order: ap_dur, ap_fare, ap_qt = hourly_total[(yyyy, mm, dd, hh)] writer.writerow([yyyy - 2000, mm, dd, hh, ap_dur, ap_fare, ap_qt]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir): candi_pkl_files = [] for dn in q_lerning_ended_dir: if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)): continue candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm)) result_pkls = [ os.path.dirname(pkl_path) + '/results-%s.pkl' % yymm for pkl_path in candi_pkl_files ] # list_argmax_as = [] state_action_fare_dur = None for pkl_file_path in candi_pkl_files: Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[( s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP list_argmax_as.append(argmax_as) # whole_rev, whole_count = 0, 0 list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [ 0 for _ in xrange(len(candi_pkl_files)) ] count = 0 with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index( 'start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[ (s1, s2, s3, alter_a)][0] / state_action_fare_dur[ (s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 for i, argmax_as in enumerate(list_argmax_as): if argmax_as[(s1, s2, s3)] == a: list_sub_rev[i] += economic_profit list_sub_count[i] += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [ whole_rev, whole_count, list_sub_rev[i], list_sub_count[i] ]) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file( result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) with open('%s/%s' % (airport_trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_tid, id_vid, id_did = headers.index('tid'), headers.index( 'vid'), headers.index('did') id_st, id_et, id_dur = headers.index('start-time'), headers.index( 'end-time'), headers.index('duration') id_fare = headers.index('fare') id_tm, id_pt_et = headers.index('trip-mode'), headers.index( 'prev-trip-end-time') id_jqt, id_qt = headers.index('join-queue-time'), headers.index( 'queue-time') with open('%s/ap-trip-op-ep-%s.csv' % (airport_trips_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = [ 'tid', 'vid', 'did', 'start-time', 'end-time', 'duration', 'fare', 'trip-mode', 'prev-trip-end-time', 'join-queue-time', 'queue-time', 'op-cost', 'economic', 'yy', 'mm', 'dd', 'hh' ] writer.writerow(new_headers) for row in reader: jqt, st, et = eval(row[id_jqt]), eval(row[id_st]), eval( row[id_et]) dur, fare = eval(row[id_dur]), eval(row[id_fare]) if st - jqt < Q_LIMIT_MIN: qt = Q_LIMIT_MIN elif Q_LIMIT_MAX < st - jqt: qt = Q_LIMIT_MAX else: qt = st - jqt modi_jqt = st - qt jqt_datetime = datetime.datetime.fromtimestamp(modi_jqt) st_datetime = datetime.datetime.fromtimestamp(st) et_datetime = datetime.datetime.fromtimestamp(et) op_cost = 0 st_yyyy, st_mm, st_dd, st_hh = st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour if jqt_datetime.hour == st_datetime.hour: try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += qt * op_cost_per_sec else: tp = datetime.datetime(st_datetime.year, st_datetime.month, st_datetime.day, st_datetime.hour) tp_timestamp = time.mktime(tp.timetuple()) p_jqt_st = (tp_timestamp - modi_jqt) / qt prev_dt = st_datetime - datetime.timedelta(hours=1) try: op_cost_per_sec = op_costs[(prev_dt.year, prev_dt.month, prev_dt.day, prev_dt.hour)] except KeyError: alternative_datetime = prev_dt - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * p_jqt_st # try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * (1 - p_jqt_st) if st_datetime.hour == et_datetime.hour: try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += dur * op_cost_per_sec else: # This part don't regards cases when duration is more than a hour tp = datetime.datetime(et_datetime.year, et_datetime.month, et_datetime.day, et_datetime.hour) tp_timestamp = time.mktime(tp.timetuple()) p_st_et = (tp_timestamp - st) / dur next_dt = st_datetime + datetime.timedelta(hours=1) try: op_cost_per_sec = op_costs[(st_yyyy, st_mm, st_dd, st_hh)] except KeyError: alternative_datetime = st_datetime - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * p_st_et # try: op_cost_per_sec = op_costs[(next_dt.year, next_dt.month, next_dt.day, next_dt.hour)] except KeyError: alternative_datetime = next_dt - datetime.timedelta( hours=1) a_yyyy, a_mm, a_dd, a_hh = \ alternative_datetime.year, alternative_datetime.month, alternative_datetime.day, alternative_datetime.hour op_cost_per_sec = op_costs[(a_yyyy, a_mm, a_dd, a_hh)] op_cost += op_cost_per_sec * qt * (1 - p_st_et) economic_profit = fare - op_cost # writer.writerow([ row[id_tid], row[id_vid], row[id_did], st, et, dur, fare, row[id_tm], row[id_pt_et], modi_jqt, qt, op_cost, economic_profit, st_yyyy, st_mm, st_dd, st_hh ]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir): candi_pkl_files = [] for dn in q_lerning_ended_dir: if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)): continue candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm)) result_pkls = [os.path.dirname(pkl_path) + '/results-%s.pkl'% yymm for pkl_path in candi_pkl_files] # list_argmax_as = [] state_action_fare_dur = None for pkl_file_path in candi_pkl_files: Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP list_argmax_as.append(argmax_as) # whole_rev, whole_count = 0, 0 list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [0 for _ in xrange(len(candi_pkl_files))] count = 0 with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 for i, argmax_as in enumerate(list_argmax_as): if argmax_as[(s1, s2, s3)] == a: list_sub_rev[i] += economic_profit list_sub_count[i] += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]]) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
def process_files(yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # init_csv_files(yymm) # full_dids = sorted([ eval(x) for x in load_picle_file('%s/%s%s.pkl' % (full_shift_dir, monthly_full_did_prefix, yymm)) ]) s_df = pd.read_csv('%s/%s%s.csv' % (full_shift_dir, sh_full_prefix, yymm)) trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm)) ap_trip_df = pd.read_csv('%s/%s%s.csv' % (airport_trips_dir, ap_trip_op_ep_prefix, yymm)) # yy, mm = int(yymm[:2]), int(yymm[2:]) for did in full_dids: # General did_sh = s_df[(s_df['driver-id'] == did)] pro_dur = sum(did_sh['productive-duration']) * SEC did_wt = trip_df[(trip_df['did'] == did)] total_fare = sum(did_wt['fare']) if pro_dur > 0 and total_fare != 0: total_prod = total_fare / pro_dur with open( '%s/%s%s.csv' % (individual_detail_dir, general_prefix, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod]) # did_ap = ap_trip_df[(ap_trip_df['did'] == did)] prev_in_ap_trip = did_ap[(did_ap['trip-mode'] == DInAP_PInAP)] prev_out_ap_trip = did_ap[(did_ap['trip-mode'] == DOutAP_PInAP)] # if len(did_ap) != 0: # prev in ap trip ap_qu, ap_dur = sum(prev_in_ap_trip['queue-time']), sum( prev_in_ap_trip['duration']) ap_fare = sum(prev_in_ap_trip['fare']) ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip['op-cost']), sum( prev_in_ap_trip['economic']) if ap_qu + ap_dur > 0 and ap_fare != 0: ap_prod = ap_fare / (ap_qu + ap_dur) with open( '%s/%s%s.csv' % (individual_detail_dir, prev_in_ap_prefix, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([ yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit ]) # # prev out ap trip ap_qu, ap_dur = sum(prev_out_ap_trip['queue-time']), sum( prev_out_ap_trip['duration']) ap_fare = sum(prev_out_ap_trip['fare']) ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip['op-cost']), sum( prev_out_ap_trip['economic']) if ap_qu + ap_dur > 0 and ap_fare != 0: ap_prod = ap_fare / (ap_qu + ap_dur) with open( '%s/%s%s.csv' % (individual_detail_dir, prev_out_ap_prefix, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([ yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit ]) print 'End the file; %s' % yymm logging_msg('End the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl') ap_pkl_file_path = None for pkl_fn in ap_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ap_crossing_times = load_picle_file(ap_pkl_file_path) # ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl') ns_pkl_file_path = None for pkl_fn in ns_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ns_crossing_times = load_picle_file(ns_pkl_file_path) # init_csv_files(yymm) with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() header_id = {h : i for i, h in enumerate(headers)} for row in reader: tid, did = row[header_id['tid']], row[header_id['did']] et, duration = row[header_id['end-time']], row[header_id['duration']] fare = row[header_id['fare']] # ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(row[header_id['ns-trip-mode']]) vid, st, prev_tet = row[header_id['vid']], eval(row[header_id['start-time']]), eval(row[header_id['prev-trip-end-time']]) # is_ap_trip, is_ns_trip = False, False # if ap_tm == DInAP_PInAP: is_ap_trip = True ap_join_queue_time = prev_tet elif ap_tm == DOutAP_PInAP: is_ap_trip = True try: i = bisect(ap_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ap_join_queue_time = ap_crossing_times[vid][i - 1] if i != 0 else ap_crossing_times[vid][0] if is_ap_trip: with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ap_queue_time = st - ap_join_queue_time new_row = [tid, vid, did, st, et, duration, fare, prev_tet, ap_tm, ap_join_queue_time, ap_queue_time] writer.writerow(new_row) # if ns_tm == DInNS_PInNS: is_ns_trip = True ns_join_queue_time = prev_tet elif ns_tm == DOutNS_PInNS: is_ns_trip = True try: i = bisect(ns_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ns_join_queue_time = ns_crossing_times[vid][i - 1] if i != 0 else ns_crossing_times[vid][0] if is_ns_trip: with open('%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ns_queue_time = st - ns_join_queue_time new_row = [tid, vid, did, st, et, duration, fare, prev_tet, ns_tm, ns_join_queue_time, ns_queue_time] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm)) # yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) dd, hh = 1, 0 cur_day_time = datetime.datetime(yyyy, mm, dd, hh) if mm == 12: next_yyyy, next_mm = yyyy + 1, 1 else: next_yyyy, next_mm = yyyy, mm + 1 last_day_time = datetime.datetime(next_yyyy, next_mm, dd, hh) # st_label = 'start-time' ap_tm_lable, ns_tm_lable = 'ap-trip-mode', 'ns-trip-mode' dur_lable, fare_label = 'duration', 'fare' # ap_tm = [DInAP_PInAP, DInAP_POutAP, DOutAP_PInAP, DOutAP_POutAP] ns_tm = [DInNS_PInNS, DInNS_POutNS, DOutNS_PInNS, DOutNS_POutNS] # while cur_day_time != last_day_time: next_day_time = cur_day_time + datetime.timedelta(hours=1) st_timestamp, et_timestamp = time.mktime( cur_day_time.timetuple()), time.mktime(next_day_time.timetuple()) # yyyy, mm, dd, hh = cur_day_time.year, cur_day_time.month, cur_day_time.day, cur_day_time.hour # filtered_trip = trip_df[(st_timestamp <= trip_df[st_label]) & (trip_df[st_label] < et_timestamp)] # gp_f_trip = filtered_trip.groupby([ap_tm_lable]) tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ap_tm] tm_num_df = gp_f_trip.count()[fare_label].to_frame( 'total_tm_num').reset_index() for tm, num in tm_num_df.values: tm_num_totalDuration_totalFare[tm][1] += num tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame( 'total_tm_dur').reset_index() for tm, dur in tm_dur_df.values: tm_num_totalDuration_totalFare[tm][2] += dur tm_fare_df = gp_f_trip.sum()[fare_label].to_frame( 'total_tm_fare').reset_index() for tm, fare in tm_fare_df.values: tm_num_totalDuration_totalFare[tm][3] += fare save_as_csv(ap_fn, yymm, dd, hh, tm_num_totalDuration_totalFare) # gp_f_trip = filtered_trip.groupby([ns_tm_lable]) tm_num_totalDuration_totalFare = [[tm, 0, 0, 0] for tm in ns_tm] tm_num_df = gp_f_trip.count()[fare_label].to_frame( 'total_tm_num').reset_index() for tm, num in tm_num_df.values: tm_num_totalDuration_totalFare[tm][1] += num tm_dur_df = gp_f_trip.sum()[dur_lable].to_frame( 'total_tm_dur').reset_index() for tm, dur in tm_dur_df.values: tm_num_totalDuration_totalFare[tm][2] += dur tm_fare_df = gp_f_trip.sum()[fare_label].to_frame( 'total_tm_fare').reset_index() for tm, fare in tm_fare_df.values: tm_num_totalDuration_totalFare[tm][3] += fare save_as_csv(ns_fn, yymm, dd, hh, tm_num_totalDuration_totalFare) # cur_day_time = next_day_time print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm)
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # print yymm if yymm == '0901': prev_yymm = None elif yymm == '1001': prev_yymm = '0911' elif yymm == '1011': prev_yymm = '1009' else: yy, mm = int(yymm[:2]), int(yymm[2:]) prev_yymm = '%02d%02d' % (yy, mm - 1) # if not prev_yymm: Qsa_value, state_action_fare_dur = {}, {} locations = [IN_AP, OUT_AP] actions = [IN_AP, OUT_AP] for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in locations: for a in actions: Qsa_value[(s1, s2, s3, a)] = 0 state_action_fare_dur[(s1, s2, s3, a)] = [0, 0] else: Qsa_value, state_action_fare_dur = load_picle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm)) # with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index( 'start-location'), id_etime, id_eloc = headers.index('end-time'), headers.index( 'end-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') # count = 0 for row in reader: prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval( row[id_stime]), eval(row[id_etime]) setup_time = stime - prev_tetime # if setup_time < 0 or HOUR * 2 < setup_time: continue # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # etime_datetime = datetime.datetime.fromtimestamp(etime) new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour new_s3 = row[id_eloc] # a = row[id_sloc] dur, fare = eval(row[id_dur]), eval(row[id_fare]) # state_action_fare_dur[(s1, s2, s3, a)][0] += fare state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur # if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[( new_s1, new_s2, new_s3, OUT_AP)]: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] else: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] # alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[ (s1, s2, s3, alter_a)][0] / state_action_fare_dur[ (s1, s2, s3, alter_a)][1] qrs = fare - op_cost + GAMMA * future_max_q_value Qsa_value[(s1, s2, s3, a)] = \ (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) save_pickle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl') ap_pkl_file_path = None for pkl_fn in ap_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ap_crossing_times = load_picle_file(ap_pkl_file_path) # ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl') ns_pkl_file_path = None for pkl_fn in ns_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ns_crossing_times = load_picle_file(ns_pkl_file_path) # init_csv_files(yymm) with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() header_id = {h: i for i, h in enumerate(headers)} for row in reader: tid, did = row[header_id['tid']], row[header_id['did']] et, duration = row[header_id['end-time']], row[ header_id['duration']] fare = row[header_id['fare']] # ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int( row[header_id['ns-trip-mode']]) vid, st, prev_tet = row[header_id['vid']], eval( row[header_id['start-time']]), eval( row[header_id['prev-trip-end-time']]) # is_ap_trip, is_ns_trip = False, False # if ap_tm == DInAP_PInAP: is_ap_trip = True ap_join_queue_time = prev_tet elif ap_tm == DOutAP_PInAP: is_ap_trip = True try: i = bisect(ap_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ap_join_queue_time = ap_crossing_times[vid][ i - 1] if i != 0 else ap_crossing_times[vid][0] if is_ap_trip: with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ap_queue_time = st - ap_join_queue_time new_row = [ tid, vid, did, st, et, duration, fare, prev_tet, ap_tm, ap_join_queue_time, ap_queue_time ] writer.writerow(new_row) # if ns_tm == DInNS_PInNS: is_ns_trip = True ns_join_queue_time = prev_tet elif ns_tm == DOutNS_PInNS: is_ns_trip = True try: i = bisect(ns_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ns_join_queue_time = ns_crossing_times[vid][ i - 1] if i != 0 else ns_crossing_times[vid][0] if is_ns_trip: with open( '%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ns_queue_time = st - ns_join_queue_time new_row = [ tid, vid, did, st, et, duration, fare, prev_tet, ns_tm, ns_join_queue_time, ns_queue_time ] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm): old_time = time.time() print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) begin_timestamp = datetime.datetime(2009, 1, 1, 0) last_timestamp = datetime.datetime(2011, 2, 1, 0) hourly_total, time_period_order = {}, [] while begin_timestamp < last_timestamp: yyyy, mm, dd, hh = begin_timestamp.year, begin_timestamp.month, begin_timestamp.day, begin_timestamp.hour k = (yyyy, mm, dd, hh) hourly_total[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE]))] time_period_order.append(k) begin_timestamp += datetime.timedelta(hours=1) # st_label, et_label, dur_label, fare_label = 'start-time', 'end-time', 'duration', 'fare' # Productive duration yyyy, mm = 2000 + int(yymm[:2]), int(yymm[2:]) with open('%s/%s%s.csv' % (shift_pro_dur_dir, shift_pro_dur_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: dd, hh = eval(row[hid['dd']]), eval(row[hid['hh']]) hourly_total[(yyyy, mm, dd, hh)][GEN_DUR] += eval( row[hid['pro-dur']]) * 60 # unit change; Minute -> Second if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) # Total fare with open('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: st_ts, et_ts = eval(row[hid[st_label]]), eval(row[hid[et_label]]) dur, fare = eval(row[hid[dur_label]]), eval(row[hid[fare_label]]) # st_dt, et_dt = datetime.datetime.fromtimestamp( st_ts), datetime.datetime.fromtimestamp(et_ts) # if st_dt.hour == et_dt.hour: hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][GEN_FARE] += fare else: next_ts_dt = datetime.datetime( st_dt.year, st_dt.month, st_dt.day, st_dt.hour) + datetime.timedelta(hours=1) tg_year, tg_month, tg_day, tg_hour = \ next_ts_dt.year, next_ts_dt.month, next_ts_dt.day, next_ts_dt.hour tg_dt = datetime.datetime(tg_year, tg_month, tg_day, tg_hour) tg_ts = time.mktime(tg_dt.timetuple()) prop = (tg_ts - st_ts) / dur hourly_total[(st_dt.year, st_dt.month, st_dt.day, st_dt.hour)][GEN_FARE] += fare * prop while True: if tg_dt.hour == et_dt.hour: prop = (et_ts - tg_ts) / dur hourly_total[(et_dt.year, et_dt.month, et_dt.day, et_dt.hour)][GEN_FARE] += fare * prop break prop = HOUR / dur hourly_total[(tg_dt.year, tg_dt.month, tg_dt.day, tg_dt.hour)][GEN_FARE] += fare * prop tg_dt += datetime.timedelta(hours=1) if (time.time() - old_time) > TIME_ALARM == 0: old_time = time.time() print 'handling; %s' % yymm logging_msg('handling; %s' % yymm) with open( '%s/%s%s.csv' % (general_dur_fare_dir, general_dur_fare_prefix, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare'] writer.writerow(header) for yyyy, mm, dd, hh in time_period_order: gen_dur, gen_fare = hourly_total[(yyyy, mm, dd, hh)] writer.writerow([yyyy - 2000, mm, dd, hh, gen_dur, gen_fare]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)