Exemplos de save_pickle_file em Python, exemplos de supports.handling_pkl.save_pickle_file em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: find_zone_clossing_time.py Projeto: jerryhan88/workspace_SMU

def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    #
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {}
    vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {}
    if yymm not in ['0901', '1001', '1011']:
        path_to_last_day_csv_file = None
        temp_csv_files = get_all_files(log_last_day_dir, '', '.csv')
        prev_fn = None
        y, m = int(yymm[:2]), int(yymm[2:])
        prev_m = m - 1
        prev_yymm = '%02d%02d' % (y, prev_m)
        for temp_fn in temp_csv_files:
            if temp_fn.startswith('log-last-day-%s' % prev_yymm):
                prev_fn = temp_fn
                break
        assert prev_fn, yymm
        path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn)
        vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \
                        record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                             vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    path_to_csv_file = '%s/%s' % (logs_dir, fn)
    vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \
            record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                 vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    #
    save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ap_crossing_time_from_out_to_in)
    save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ns_crossing_time_from_out_to_in)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)

Exemplo n.º 2

0

Exibir arquivo

def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    is_driver_vehicle = load_picle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm))
    full_drivers = set()
    with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_yy, id_mm, id_dd, id_hh = headers.index('yy'), headers.index('mm'), headers.index('dd'), headers.index('hh')
        id_vid, id_did = headers.index('vid'), headers.index('did')
        id_pro_dur, id_x_pro_dur = headers.index('pro-dur'), headers.index('x-pro-dur')
        with open('%s/shift-full-time-%s.csv' % (full_shift_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['year', 'month', 'day', 'hour', 'vehicle-id', 'driver-id', 'productive-duration', 'x-productive-duration']
            writer.writerow(new_headers)
            for row in reader:
                if len(is_driver_vehicle[row[id_vid]]) > 1:
                    continue
                writer.writerow([row[id_yy], row[id_mm], row[id_dd], row[id_hh], row[id_vid], row[id_did], row[id_pro_dur], row[id_x_pro_dur]])
                full_drivers.add(row[id_did])
    save_pickle_file('%s/full-time-drivers-%s.pkl' % (full_shift_dir, yymm), full_drivers)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: policies_summary.py Projeto: jerryhan88/workspace_SMU

def drivers():
    policies = {}
    for fn in get_all_files(for_full_driver_dir,
                            'diff-pin-eco-extreme-drivers-trip-', '.csv'):
        _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_ptet, id_ptel = headers.index(
                'prev-trip-end-time'), headers.index('prev-trip-end-location')
            id_sl = headers.index('start-location')
            for row in reader:
                prev_tetime_datetime = datetime.datetime.fromtimestamp(
                    int(row[id_ptet]))
                s1, s2 = prev_tetime_datetime.strftime(
                    "%a"), prev_tetime_datetime.hour
                s3 = row[id_ptel]
                if not policies.has_key((s1, s2, s3)):
                    policies[(s1, s2, s3)] = [0, 0]
                i = index_IN_OUT_AP[row[id_sl]]
                policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])),
                          '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('extreme_drivers_policy.pkl', op_policies)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: driver_monthly_fare_ap_trips.py Projeto: jerryhan88/workspace_SMU

def run():
    check_dir_create(summary_dir)
    #
    Y09 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y09-airport-trip.csv"))
    Y10 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y10-airport-trip.csv"))
    Y09 = Y09[(Y09["did"] != -1)]
    Y10 = Y10[(Y10["did"] != -1)]
    #
    Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(["mm", "did"]), Y10.groupby(["mm", "did"])
    Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()["fare"])]
    Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()["fare"])]
    #
    save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])

Exemplo n.º 5

0

Exibir arquivo

Arquivo: driver_monthly_fare_ap_trips.py Projeto: jerryhan88/workspace_SMU

def run():
    check_dir_create(summary_dir)
    #
    Y09 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y09-airport-trip.csv'))
    Y10 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y10-airport-trip.csv'))
    Y09 = Y09[(Y09['did'] != -1)]
    Y10 = Y10[(Y10['did'] != -1)]
    #
    Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(['mm', 'did'
                                                ]), Y10.groupby(['mm', 'did'])
    Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()['fare'])]
    Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()['fare'])]
    #
    save_pickle_file(driver_monthly_fare_ap_trips,
                     [Y09_ap_fares, Y10_ap_fares])

Exemplo n.º 6

0

Exibir arquivo

Arquivo: policies_summary.py Projeto: jerryhan88/workspace_SMU

def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)): 
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: compare_Q_D.py Projeto: jerryhan88/workspace_SMU

def process_files(yymm):
    Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-0.10-GAMMA-0.50/ALPHA-0.10-GAMMA-0.50-q-value-fare-dur-%s.pkl'%(for_learning_dir,yymm))
    argmax_as = {}
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
    #
    whole_rev, sub_rev = 0, 0
    whole_count, sub_count = 0,0
    count = 0        
    with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            if argmax_as[(s1, s2, s3)] == a:
                sub_rev += economic_profit
                sub_count += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
    save_pickle_file('%s/comparision-%s.pkl'%(for_full_driver_dir, yymm), [whole_rev, whole_count, sub_rev, sub_count])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: driver_monthly_fare_general_trips.py Projeto: jerryhan88/workspace_SMU

def run():
    check_dir_create(summary_dir)
    #
    Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], []
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = "%02d%02d" % (y, m)
            if yymm in ["0912", "1010"]:
                continue
            trip_df = pd.read_csv("%s/whole-trip-%s.csv" % (trips_dir, yymm))

            trip_df = trip_df[(trip_df["did"] != -1)]
            fares = [x / CENT for x in list(trip_df.groupby(["did"]).sum()["fare"])]
            if y == 9:
                Y09_driver_total_monthly_fare += fares
            else:
                Y10_driver_total_monthly_fare += fares
    save_pickle_file(monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])

Exemplo n.º 9

0

Exibir arquivo

Arquivo: policies_summary.py Projeto: jerryhan88/workspace_SMU

def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)):
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(
                            s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])),
                          '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: policies_summary.py Projeto: jerryhan88/workspace_SMU

def drivers():
    policies = {}
    for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'):
        _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_ptet, id_ptel = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
            id_sl = headers.index('start-location') 
            for row in reader:
                prev_tetime_datetime = datetime.datetime.fromtimestamp(int(row[id_ptet]))
                s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
                s3 = row[id_ptel]
                if not policies.has_key((s1, s2, s3)): 
                    policies[(s1, s2, s3)] = [0, 0]
                i = index_IN_OUT_AP[row[id_sl]]
                policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('extreme_drivers_policy.pkl', op_policies)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: driver_monthly_fare_general_trips.py Projeto: jerryhan88/workspace_SMU

def run():
    check_dir_create(summary_dir)
    #
    Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], []
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue
            trip_df = pd.read_csv('%s/whole-trip-%s.csv' % (trips_dir, yymm))

            trip_df = trip_df[(trip_df['did'] != -1)]
            fares = [
                x / CENT for x in list(trip_df.groupby(['did']).sum()['fare'])
            ]
            if y == 9:
                Y09_driver_total_monthly_fare += fares
            else:
                Y10_driver_total_monthly_fare += fares
    save_pickle_file(
        monthly_fare_summary,
        [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])

Exemplo n.º 12

0

Exibir arquivo

Arquivo: productive_times.py Projeto: jerryhan88/workspace_SMU

def process_file(fn):
    _, _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #        
    driver_vehicle = {}
    productive_state = ['dur%d' % x for x in [0, 3, 4, 5, 6, 7, 8, 9, 10]]
    with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        hid = {h : i for i, h in enumerate(headers)}
        with open('%s/shift-pro-dur-%s.csv' % (shift_pro_dur_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['yy', 'mm', 'dd', 'hh', 'vid', 'did', 'pro-dur']
            writer.writerow(new_headers)
            for row in reader:
                vid, did = row[hid['vehicle-id']], row[hid['driver-id']]
                productive_duration = sum(int(row[hid[dur]]) for dur in productive_state)
                writer.writerow([row[hid['year']][-2:], row[hid['month']], row[hid['day']], row[hid['hour']],
                                 vid, did, productive_duration])
                driver_vehicle.setdefault(vid, set()).add(did)
    save_pickle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm), driver_vehicle)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: handle_q_learning_decisions.py Projeto: jerryhan88/workspace_SMU

def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm))
    result_pkls = [os.path.dirname(pkl_path) + '/results-%s.pkl'% yymm for pkl_path in candi_pkl_files]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [0 for _ in xrange(len(candi_pkl_files))]
    
    count = 0        
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            
            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])

Exemplo n.º 14

0

Exibir arquivo

def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23)
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = [
            'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration',
            'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare',
            'ns-queue-time', 'gen-productivity', 'ap-productivity',
            'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'
        ]
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur -
                                                  (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur -
                                                  (ns_dur + ns_queue))
            #
            writer.writerow([
                yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue,
                ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod,
                ns_prod, ns_out_prod
            ])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: handle_q_learning_decisions.py Projeto: jerryhan88/workspace_SMU

def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' %
                          (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' %
                               (for_learning_dir, dn, dn, yymm))
    result_pkls = [
        os.path.dirname(pkl_path) + '/results-%s.pkl' % yymm
        for pkl_path in candi_pkl_files
    ]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3,
                                                          OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [
        0 for _ in xrange(len(candi_pkl_files))
    ]

    count = 0
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime])
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1

            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [
                        whole_rev, whole_count, list_sub_rev[i],
                        list_sub_count[i]
                    ])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(
            result_fn,
            [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])

Exemplo n.º 16

0

Exibir arquivo

Arquivo: hourly_productivities.py Projeto: jerryhan88/workspace_SMU

def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23) 
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'):
        print fn 
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh',
                    'gen-duration', 'gen-fare',
                    'ap-duration', 'ap-fare', 'ap-queue-time',
                    'ns-duration', 'ns-fare', 'ns-queue-time',
                    'gen-productivity',
                    'ap-productivity', 'ap-out-productivity',
                    'ns-productivity', 'ns-out-productivity']
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue))
            #
            writer.writerow([yy, mm, dd, hh,
                            gen_dur, gen_fare,
                            ap_dur, ap_fare, ap_queue,
                            ns_dur, ns_fare, ns_queue,
                            gen_prod,
                            ap_prod, ap_out_prod,
                            ns_prod, ns_out_prod])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: q_learning.py Projeto: jerryhan88/workspace_SMU

def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):            
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:]) 
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #
    
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index('end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(row[id_stime]), eval(row[id_etime]) 
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue 
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] :
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
        save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)

Exemplo n.º 18

0

Exibir arquivo

def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:])
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #

    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index(
            'end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(
                row[id_stime]), eval(row[id_etime])
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(
                    new_s1, new_s2, new_s3, OUT_AP)]:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3,
                                                OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file(
                    '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
                    (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
                    [Qsa_value, state_action_fare_dur])
        save_pickle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
            [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)

Exemplo n.º 19

0

Exibir arquivo

def run():
    Y09_general = pd.read_csv('%s/Y09-individual-general.csv' % (individual_detail_dir))
    Y10_general = pd.read_csv('%s/Y10-individual-general.csv' % (individual_detail_dir))
    Y09_prev_in = pd.read_csv('%s/Y09-individual-prev-in-ap.csv' % (individual_detail_dir))
    Y10_prev_in = pd.read_csv('%s/Y10-individual-prev-in-ap.csv' % (individual_detail_dir))
    Y09_prev_out = pd.read_csv('%s/Y09-individual-prev-out-ap.csv' % (individual_detail_dir))
    Y10_prev_out = pd.read_csv('%s/Y10-individual-prev-out-ap.csv' % (individual_detail_dir))
    #
    Y09_prev_in = Y09_prev_in[(Y09_prev_in['ap-prod'] < PROD_LIMIT)]
    Y09_prev_in = Y09_prev_in[(0 < Y09_prev_in['op-cost'])]
    Y10_prev_in = Y10_prev_in[(Y10_prev_in['ap-prod'] < PROD_LIMIT)]
    Y10_prev_in = Y10_prev_in[(0 < Y10_prev_in['op-cost'])]
    #
    Y09_prev_out = Y09_prev_out[(Y09_prev_out['ap-prod'] < PROD_LIMIT)]
    Y09_prev_out = Y09_prev_out[(0 < Y09_prev_out['op-cost'])]
    Y10_prev_out = Y10_prev_out[(Y10_prev_out['ap-prod'] < PROD_LIMIT)]
    Y10_prev_out = Y10_prev_out[(0 < Y10_prev_out['op-cost'])]
    # both years
    Y09_general_did, Y10_general_did = set(Y09_general['did']), set(Y10_general['did'])
    general_both_years_full_drivers = Y09_general_did.intersection(Y10_general_did)
    Y09_prev_in_did, Y10_prev_in_did = set(Y09_prev_in['did']), set(Y10_prev_in['did'])
    prev_in_both_years_full_drivers = Y09_prev_in_did.intersection(Y10_prev_in_did)
    Y09_prev_out_did, Y10_prev_out_did = set(Y09_prev_out['did']), set(Y10_prev_out['did'])
    prev_out_both_years_full_drivers = Y09_prev_out_did.intersection(Y10_prev_out_did)
    subset_drivers = general_both_years_full_drivers.intersection(prev_in_both_years_full_drivers)
    subset_drivers = list(subset_drivers.intersection(prev_out_both_years_full_drivers)) 
    #
    Y09_general = Y09_general[Y09_general['did'].isin(subset_drivers)]
    Y10_general = Y10_general[Y10_general['did'].isin(subset_drivers)]
    Y09_prev_in = Y09_prev_in[Y09_prev_in['did'].isin(subset_drivers)]
    Y10_prev_in = Y10_prev_in[Y10_prev_in['did'].isin(subset_drivers)]
    Y09_prev_out = Y09_prev_out[Y09_prev_out['did'].isin(subset_drivers)]
    Y10_prev_out = Y10_prev_out[Y10_prev_out['did'].isin(subset_drivers)]
    #
    Y09_general_gb, Y10_general_gb = Y09_general.groupby(['did']), Y10_general.groupby(['did'])
    Y09_driver_general_prod = Y09_general_gb.mean()['total-prod'].to_frame('avg_total_prod').reset_index()
    Y10_driver_general_prod = Y10_general_gb.mean()['total-prod'].to_frame('avg_total_prod').reset_index()
    Y09_driver_genprod_hour = {did : total_prod * HOUR / CENT for did, total_prod in Y09_driver_general_prod.values}
    Y10_driver_genprod_hour = {did : total_prod * HOUR / CENT for did, total_prod in Y10_driver_general_prod.values}
    #
    Y09_prev_in_gb, Y10_prev_in_gb = Y09_prev_in.groupby(['did']), Y10_prev_in.groupby(['did'])
    Y09_prev_in_driver_ap_prod = Y09_prev_in_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index()
    Y10_prev_in_driver_ap_prod = Y10_prev_in_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index()
    Y09_prev_in_driver_eco_pro = Y09_prev_in_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index()
    Y10_prev_in_driver_eco_pro = Y10_prev_in_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index()
    #
    Y09_pin_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y09_prev_in_driver_ap_prod.values}
    Y10_pin_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y10_prev_in_driver_ap_prod.values}
    Y09_pin_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y09_prev_in_driver_eco_pro.values}
    Y10_pin_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y10_prev_in_driver_eco_pro.values}
    #
    Y09_prev_out_gb, Y10_prev_out_gb = Y09_prev_out.groupby(['did']), Y10_prev_out.groupby(['did'])
    Y09_prev_out_driver_ap_prod = Y09_prev_out_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index()
    Y10_prev_out_driver_ap_prod = Y10_prev_out_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index()
    Y09_prev_out_driver_eco_pro = Y09_prev_out_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index()
    Y10_prev_out_driver_eco_pro = Y10_prev_out_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index()
    #
    Y09_pout_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y09_prev_out_driver_ap_prod.values}
    Y10_pout_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y10_prev_out_driver_ap_prod.values}
    Y09_pout_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y09_prev_out_driver_eco_pro.values}
    Y10_pout_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y10_prev_out_driver_eco_pro.values}
    
    save_pickle_file('%s/productivities_ext.pkl' % (individual_detail_dir),
                     [subset_drivers,
                      Y09_driver_genprod_hour, Y10_driver_genprod_hour,
                      Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour,
                      Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour,
                      Y09_pin_driver_epro_month, Y10_pin_driver_epro_month,
                      Y09_pout_driver_epro_month, Y10_pout_driver_epro_month
                      ])