def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') # print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {} vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {} if yymm not in ['0901', '1001', '1011']: path_to_last_day_csv_file = None temp_csv_files = get_all_files(log_last_day_dir, '', '.csv') prev_fn = None y, m = int(yymm[:2]), int(yymm[2:]) prev_m = m - 1 prev_yymm = '%02d%02d' % (y, prev_m) for temp_fn in temp_csv_files: if temp_fn.startswith('log-last-day-%s' % prev_yymm): prev_fn = temp_fn break assert prev_fn, yymm path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \ record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) path_to_csv_file = '%s/%s' % (logs_dir, fn) vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \ record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) # save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ap_crossing_time_from_out_to_in) save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ns_crossing_time_from_out_to_in) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def drivers(): policies = {} for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'): _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-') with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_ptet, id_ptel = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_sl = headers.index('start-location') for row in reader: prev_tetime_datetime = datetime.datetime.fromtimestamp( int(row[id_ptet])) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_ptel] if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[row[id_sl]] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('extreme_drivers_policy.pkl', op_policies)
def run(): remove_creat_dir(shift_pro_dur_dir) init_multiprocessor() count_num_jobs = 0 for fn in get_all_files(shifts_dir, 'shift-hour-state-', '.csv'): put_task(process_file, [fn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(for_full_driver_dir) init_multiprocessor() count_num_jobs = 0 for fn in get_all_files(merged_trip_dir, 'trips', '.csv'): # process_file(fn) put_task(process_file, [fn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(airport_trips_dir); remove_creat_dir(nightsafari_trips_dir) csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: # process_file(fn) put_task(process_file, [fn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(trips_dir) csv_files = get_all_files(merged_trip_dir, 'trips', '.csv') init_multiprocessor() counter = 0 for fn in csv_files: counter += 1 put_task(process_file, [fn]) end_multiprocessor(counter)
def run(): remove_creat_dir(hourly_summary) csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv') # init_multiprocessor() count_num_jobs = 0 for fn in csv_files: put_task(process_file, [fn]) # process_file(fn) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(airport_trips_dir) remove_creat_dir(nightsafari_trips_dir) csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: # process_file(fn) put_task(process_file, [fn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): remove_creat_dir(for_learning_dir) csv_files = get_all_files(merged_trip_dir, 'trips', '.csv') # init_multiprocessor() count_num_jobs = 0 for fn in csv_files: # process_file(fn) put_task(process_file, [fn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision' pickle_files = get_all_files(dir_path, 'comparision-', '.pkl') for pkl_file in pickle_files: yymm = pkl_file[:-len('.pkl')].split('-')[-1] yy, mm = int(yymm[:2]), int(yymm[2:]) whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/comparision-%s.pkl' % (dir_path, yymm)) with open(fn, 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def run(): dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision' pickle_files = get_all_files(dir_path, 'comparision-', '.pkl') for pkl_file in pickle_files: yymm = pkl_file[:-len('.pkl')].split('-')[-1] yy, mm = int(yymm[:2]), int(yymm[2:]) whole_rev, whole_count, sub_rev, sub_count = load_picle_file( '%s/comparision-%s.pkl' % (dir_path, yymm)) with open(fn, 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow( [yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def process_files(ALPHA, GAMMA): ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA) if not os.path.exists(ALPHA_GAMMA_dir): return None pickle_files = get_all_files(ALPHA_GAMMA_dir, 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA), '.pkl') for pkl_file in pickle_files: yymm = pkl_file[:-len('.pkl')].split('-')[-1] yy, mm = int(yymm[:2]), int(yymm[2:]) whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm)) with open(fn, 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([ALPHA, GAMMA, yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def run(): csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: try: put_task(process_file, [fn]) except Exception as _: logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def test(): def difference(data0, data1): diff = {} for k, v in data0.iteritems(): diff[k] = data1[k] - v return diff def ordering(dids_values): order_v_did = [] for did, v in dids_values.iteritems(): order_v_did.append([v, did]) order_v_did.sort() order_v_did.reverse() return order_v_did def find_extreme_range(order_v_did): # more than mean's 50 percent values = [v for v, _ in order_v_did] mu, std = np.mean(values), np.std(values) i = 0 while order_v_did[i][0] > mu + std * 2.0: i += 1 return (0, i / len(order_v_did)) both_years_full_drivers, \ Y09_driver_genprod_hour, Y10_driver_genprod_hour, \ Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour, \ Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour, \ Y09_pin_driver_epro_month, Y10_pin_driver_epro_month, \ Y09_pout_driver_epro_month, Y10_pout_driver_epro_month = load_picle_file('%s/productivities_ext.pkl' % (individual_detail_dir)) # diff_general_prod = difference(Y09_driver_genprod_hour, Y10_driver_genprod_hour) diff_pin_prod = difference(Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour) diff_pout_prod = difference(Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour) diff_pin_eco = difference(Y09_pin_driver_epro_month, Y10_pin_driver_epro_month) diff_pout_eco = difference(Y09_pout_driver_epro_month, Y10_pout_driver_epro_month) order_v_did = ordering(diff_pin_eco) print len(diff_pin_eco) r1, r2 = find_extreme_range(order_v_did) extreme_drivers = [int(did) for _, did in order_v_did[int(r1 * len(order_v_did)):int(r2 * len(order_v_did))]] for fn in get_all_files(for_full_driver_dir, 'full-drivers-trips-', '.csv'): _, _, _, yymm = fn[:-len('.csv')].split('-') with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_did = headers.index('did') with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow(headers) for row in reader: did = int(row[id_did]) if did not in extreme_drivers: continue writer.writerow(row)
def run(): remove_creat_dir(full_shift_dir) csv_files = get_all_files(shifts_dir, '', '.csv') init_multiprocessor() count_num_jobs = 0 for fn in csv_files: try: put_task(process_file, [fn]) except Exception as _: logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc()) raise count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): candi_dirs = get_all_directories(for_learning_dir) q_lerning_ended_dir = [dn for dn in candi_dirs if len(get_all_files(for_learning_dir + '/%s' % (dn), 'ALPHA-', '.pkl')) == 22] init_multiprocessor() counter = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm, q_lerning_ended_dir) put_task(process_files, [yymm, q_lerning_ended_dir]) counter += 1 end_multiprocessor(counter)
def q_learning(): policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50') policies = {} for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'): Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn) for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[IN_AP] if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('q_learning_policy.pkl', op_policies)
def q_learning(): policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50') policies = {} for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'): Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn) for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[IN_AP] if Qsa_value[( s1, s2, s3, IN_AP)] >= Qsa_value[( s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('q_learning_policy.pkl', op_policies)
def process_files(ALPHA, GAMMA): ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA) if not os.path.exists(ALPHA_GAMMA_dir): return None pickle_files = get_all_files(ALPHA_GAMMA_dir, 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA), '.pkl') for pkl_file in pickle_files: yymm = pkl_file[:-len('.pkl')].split('-')[-1] yy, mm = int(yymm[:2]), int(yymm[2:]) whole_rev, whole_count, sub_rev, sub_count = load_picle_file( '%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm)) with open(fn, 'a') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow([ ALPHA, GAMMA, yy, mm, whole_rev / whole_count, sub_rev / sub_count ])
def run(): candi_dirs = get_all_directories(for_learning_dir) q_lerning_ended_dir = [ dn for dn in candi_dirs if len(get_all_files(for_learning_dir + '/%s' % (dn), 'ALPHA-', '.pkl')) == 22 ] init_multiprocessor() counter = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm, q_lerning_ended_dir) put_task(process_files, [yymm, q_lerning_ended_dir]) counter += 1 end_multiprocessor(counter)
def drivers(): policies = {} for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'): _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-') with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_ptet, id_ptel = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_sl = headers.index('start-location') for row in reader: prev_tetime_datetime = datetime.datetime.fromtimestamp(int(row[id_ptet])) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_ptel] if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[row[id_sl]] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('extreme_drivers_policy.pkl', op_policies)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl') ap_pkl_file_path = None for pkl_fn in ap_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ap_crossing_times = load_picle_file(ap_pkl_file_path) # ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl') ns_pkl_file_path = None for pkl_fn in ns_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ns_crossing_times = load_picle_file(ns_pkl_file_path) # init_csv_files(yymm) with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() header_id = {h: i for i, h in enumerate(headers)} for row in reader: tid, did = row[header_id['tid']], row[header_id['did']] et, duration = row[header_id['end-time']], row[ header_id['duration']] fare = row[header_id['fare']] # ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int( row[header_id['ns-trip-mode']]) vid, st, prev_tet = row[header_id['vid']], eval( row[header_id['start-time']]), eval( row[header_id['prev-trip-end-time']]) # is_ap_trip, is_ns_trip = False, False # if ap_tm == DInAP_PInAP: is_ap_trip = True ap_join_queue_time = prev_tet elif ap_tm == DOutAP_PInAP: is_ap_trip = True try: i = bisect(ap_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ap_join_queue_time = ap_crossing_times[vid][ i - 1] if i != 0 else ap_crossing_times[vid][0] if is_ap_trip: with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ap_queue_time = st - ap_join_queue_time new_row = [ tid, vid, did, st, et, duration, fare, prev_tet, ap_tm, ap_join_queue_time, ap_queue_time ] writer.writerow(new_row) # if ns_tm == DInNS_PInNS: is_ns_trip = True ns_join_queue_time = prev_tet elif ns_tm == DOutNS_PInNS: is_ns_trip = True try: i = bisect(ns_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ns_join_queue_time = ns_crossing_times[vid][ i - 1] if i != 0 else ns_crossing_times[vid][0] if is_ns_trip: with open( '%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ns_queue_time = st - ns_join_queue_time new_row = [ tid, vid, did, st, et, duration, fare, prev_tet, ns_tm, ns_join_queue_time, ns_queue_time ] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = [ 'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity' ] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([ yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod ]) # save_pickle_file(zero_duration_time_slots, zero_dur)
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod]) # save_pickle_file(zero_duration_time_slots, zero_dur)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl') ap_pkl_file_path = None for pkl_fn in ap_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ap_crossing_times = load_picle_file(ap_pkl_file_path) # ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl') ns_pkl_file_path = None for pkl_fn in ns_pkl_files: _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-') if pkl_yymm == yymm: ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn) break else: assert False, yymm ns_crossing_times = load_picle_file(ns_pkl_file_path) # init_csv_files(yymm) with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() header_id = {h : i for i, h in enumerate(headers)} for row in reader: tid, did = row[header_id['tid']], row[header_id['did']] et, duration = row[header_id['end-time']], row[header_id['duration']] fare = row[header_id['fare']] # ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(row[header_id['ns-trip-mode']]) vid, st, prev_tet = row[header_id['vid']], eval(row[header_id['start-time']]), eval(row[header_id['prev-trip-end-time']]) # is_ap_trip, is_ns_trip = False, False # if ap_tm == DInAP_PInAP: is_ap_trip = True ap_join_queue_time = prev_tet elif ap_tm == DOutAP_PInAP: is_ap_trip = True try: i = bisect(ap_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ap_join_queue_time = ap_crossing_times[vid][i - 1] if i != 0 else ap_crossing_times[vid][0] if is_ap_trip: with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ap_queue_time = st - ap_join_queue_time new_row = [tid, vid, did, st, et, duration, fare, prev_tet, ap_tm, ap_join_queue_time, ap_queue_time] writer.writerow(new_row) # if ns_tm == DInNS_PInNS: is_ns_trip = True ns_join_queue_time = prev_tet elif ns_tm == DOutNS_PInNS: is_ns_trip = True try: i = bisect(ns_crossing_times[vid], st) except KeyError: logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']])) continue ns_join_queue_time = ns_crossing_times[vid][i - 1] if i != 0 else ns_crossing_times[vid][0] if is_ns_trip: with open('%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile: writer = csv.writer(w_csvfile) ns_queue_time = st - ns_join_queue_time new_row = [tid, vid, did, st, et, duration, fare, prev_tet, ns_tm, ns_join_queue_time, ns_queue_time] writer.writerow(new_row) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
from __future__ import division # Add the root path for packages I made import os, sys sys.path.append(os.getcwd() + '/..') # from supports._setting import for_learning_dir from supports.etc_functions import get_all_files # for i in xrange(11): for j in xrange(11): ALPHA, GAMMA = i / 10, j / 10 dn = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA) print ALPHA, GAMMA, if not os.path.exists(dn): print 'None' continue print len(get_all_files(dn, 'ALPHA-', '.pkl')), print len(get_all_files(dn, 'results-', '.pkl'))