def get_regression_features(zone_choice,
                            arrival_choice=-1,
                            cut_choice=-1,
                            cap_mode_choice=2):
    zone_path, zone_file_prefix = get_zone_output_path(zone_choice, root)
    df = pd.read_csv(os.path.join(zone_path, zone_file_prefix + 'Summary.csv'))
    df = unstack_summary_df(df, zone=zone_choice).dropna(subset=['capacity'])
    df = df.fillna(0)
    df['cut'] = to_categories(df['cut'])

    if cut_choice in [0, 1]:
        df = df[df['cut'] == cut_choice]
    elif cut_choice == 2:
        df = df[df['cut'] != cut_choice]

    if arrival_choice != -1:
        df = df[df['arrival'] == arrival_choice]

    y = df["discount"]
    if cap_mode_choice == 0:
        x = df[['eco', 'capacity']]
    elif cap_mode_choice == 1:
        x = df[['eco', 'capacity', 'capacity_avg_day']]
    else:
        x = df[['eco', 'capacity', 'capacity_avg_day', 'capacity_avg_global']]
    return x, y, df
示例#2
0
def create_summary_by_zone(zones):
    """
    Creates summary file in zone folder
    Args:
        zones (iterable): Zone number
    """

    for zone in zones:
        zone_path, zone_file_prefix = get_zone_output_path(zone, data_path)
        df_avail, df_order, df_steer, df_cap = load_data_file_for_zone(
            zone, data_path)
        slots_observed, cslots_observed = get_slots_observed(
            zone_path,
            zone_file_prefix,
            df_avail,
            df_order.columns,
            check_existing=False)
        slots_active, cslots_active, slots_offered, cslots_offered = get_slots_active(
            zone_path, zone_file_prefix, df_order, slots_observed)
        summary = summarize(zone_path, zone_file_prefix, df_avail, df_order,
                            df_steer, df_cap, slots_offered, cslots_offered)
        summary = summary.reset_index()
        summary['EVENT_DTM'] = pd.to_datetime(summary['EVENT_DTM'])
        plot_arrivals(zone_path, zone_file_prefix, summary, '60min', 'mean',
                      'ALL')
示例#3
0
def create_eco_and_discount_from_steering(zone):
    """
    Reads steering file and creates eco and discount files from it
    Args:
        zone (str): Zone number in float

    Returns:
        (pd.DataFrame): eco_df, discount_df
    """
    zone_path, zone_file_prefix = get_zone_output_path(zone, data_path)
    df_avail, df_order, df_steer, df_cap = load_data_file_for_zone(
        zone, data_path)
示例#4
0
def split_summary_by_day_and_cut():
    """

    Returns:

    """
    zlist = ['700.0', '500.0']
    for zone in zlist:
        zone_path, zone_file_prefix = get_zone_output_path(zone, data_path)
        summary_zone = pd.read_csv(
            os.path.join(zone_path, 'Zone_' + zone[:-2] + '_Summary.csv'))
        summary_zone['EVENT_DTM'] = pd.to_datetime(summary_zone['EVENT_DTM'])
        slots_zone_offered, cslots_zone_offered = read_offered_slots(zone_path)
        for day in ['0', '1', '2', '3', '4', '5', '6']:
            plot_arrivals(
                os.path.join(zone_path, day),
                os.path.join(zone_file_prefix, '_Arrival_Day_' + day),
                summary_zone, '60min', 'mean', day)

            # save offered slots for each day
            # summary df by day

            for cutcat in ['BEFORE_CUT1', 'BEFORE_CUT2', 'MISSED_BOTH_CUTS']:
                print(zone, day, cutcat)
                cut_path, cut_prefix = get_cut_path(day, cutcat, zone_path,
                                                    zone_file_prefix)
                summary = summary_zone.loc[
                    (summary_zone['ARRIVAL_CAT'] == cutcat)
                    & (summary_zone['ARRIVAL_DAY'] == int(day))]
                slots_active, cslots_active, slots_offered, cslots_offered = get_slots_active(
                    cut_path, cut_prefix,
                    summary[slots_zone_offered + ['SLOT_CHOICE']],
                    slots_zone_offered)
                cols = summary.columns[0:16].tolist() + ['SLOTS_AVAILABLE', 'NO_PURCHASE'] + slots_offered + \
                       ['CNO_PURCHASE'] + cslots_offered + [col + '_Eco' for col in slots_offered] + \
                       [col + '_Discount' for col in slots_offered] + [col + '_Capacity' for col in slots_offered]
                summary[cols].to_csv(os.path.join(cut_path,
                                                  cut_prefix + 'Summary.csv'),
                                     index=False)
                del slots_offered, cslots_offered, slots_active, cslots_active,
                if pd.size(summary):
                    plot_arrivals(cut_path, cut_prefix, summary[cols], '60min',
                                  'mean', day)
                    plt.close()
                del summary, cut_path, cut_prefix
示例#5
0
def get_regression_features_ranked(zone_choice,
                                   arrival_choice=-1,
                                   cut_choice=-1,
                                   cap_mode_choice=2):
    zone_path, zone_file_prefix = get_zone_output_path(zone_choice, root)
    df = pd.DataFrame()
    path = os.path.join(zone_path, 'RankData',
                        zone_file_prefix + 'Arrival_Day_{}' + '_Summary.csv')
    for days in range(7):
        df = df.append(pd.read_csv(path.format(days)))
    df = unstack_summary_df_ranked(
        df, zone=zone_choice).dropna(subset=['capacity'])
    df = df.fillna(0)
    df['cut'] = to_categories(df['cut'])

    if cut_choice in [0, 1]:
        df = df[df['cut'] == cut_choice]
    elif cut_choice == 2:
        df = df[df['cut'] != cut_choice]

    if arrival_choice != -1:
        df = df[df['arrival'] == arrival_choice]
    features = ['eco', 'arrival', 'cut', 'slot']

    y = df["discount"]
    if cap_mode_choice == 0:
        x = df[features + ['capacity']]
    elif cap_mode_choice == 1:
        x = df[features + ['capacity', 'capacity_avg_day']]
    else:
        x = df[features +
               ['capacity', 'capacity_avg_day', 'capacity_avg_global']]
    slot = x['slot'].str.split("_", n=1, expand=True)
    x['day'] = slot[0]
    x['slot'] = slot[1]
    x = col_to_one_hot(x, 'slot', prefix='slot', drop_first=True)
    x = col_to_one_hot(x, 'day', prefix='day', drop_first=True)
    return x, y, df
示例#6
0
                               len(gr_cols))
        print('Iteration=', i, 'loglikelihood =', log_likeli, 'beta_disc',
              beta_coef[-3], 'beta_eco', beta_coef[-2], 'beta_gr',
              beta_coef[-1])
        if np.linalg.norm(beta_coef[:-1] - beta[:-1]) < 10**-6 or i > 500:
            predict_prob_df = pd.DataFrame(Q,
                                           columns=['NO_PURCHASE'] +
                                           slots_offered)
            beta_df = pd.DataFrame([np.array(beta_coef)],
                                   columns=['NO_PURCHASE'] + slots_offered +
                                   ['Discount', 'Eco', 'Gr'])
            predict_prob_df.to_csv(Location + filename +
                                   'predprobfeatures.csv')
            beta_df.to_csv(Location + filename + 'betafeatures.csv')
            del summary, predict_prob_df, design_df, features_df, assortment_df, choice_df, design, features, assortment, choice
            break
    return beta_df.iloc[0]


if __name__ == "__main__":
    zone_path, zone_file_prefix = get_zone_output_path(zone, root)
    summary = pd.read_csv(
        '/Users/anupamtripathi/PycharmProjects/RA_/results/gr/500.0/gr_unranked_stacked_arrival_0_cut_-1_cap_2.csv'
    )
    summary = summary.drop(['NO_PURCHASE_Eco', 'NO_PURCHASE_Discount'], axis=1)
    summary = summary.dropna()
    slots_offered = pd.read_csv(
        os.path.join(zone_path, zone_file_prefix + 'SlotsOfferedTitle.csv'))
    MMfeaturesBoot(zone_path, zone_file_prefix, summary,
                   slots_offered['slotsOffered'].tolist())
示例#7
0
def unstack_summary_df_ranked(summary_df,
                              zone,
                              root='data',
                              check_saved=False,
                              save=False):  # do for summary
    """
    Unstacks the summary dataframe
    Args:
        summary_df (dataframe): summary dataframe

    Returns: unstacked dataframe
    """
    zone_path, zone_file_prefix = get_zone_output_path(zone, root)
    if os.path.exists(
            os.path.join(zone_path, 'RankData',
                         zone_file_prefix + 'unstacked.csv')) and check_saved:
        return pd.read_csv(os.path.join(zone_path, 'RankData',
                                        zone_file_prefix + 'unstacked.csv'),
                           index_col=0)
    summary_df['primary_key'] = summary_df['EVENT_DTM'].astype(
        str) + '-' + summary_df['CUSTOMER_ID'].astype(str)
    slots_per_day = u.get_slots_per_day_for_zone_ranked(zone, root)
    slots, cslots = u.get_slots_observed_ranked(zone_path, zone_file_prefix,
                                                summary_df)

    # get capacities sums
    summary_df['capacity_sum_global'] = summary_df.loc[:, slots[0] +
                                                       '_Capacity':slots[-1] +
                                                       '_Capacity'].fillna(
                                                           0).sum(axis=1)
    for day in range(7):
        slot_start = slot_end if day != 0 else list(
            summary_df.columns).index(slots[0] + '_Capacity')
        slot_end = slot_start + slots_per_day[day + 1]
        summary_df['capacity_sum_day_' +
                   str(day + 1)] = summary_df.iloc[:,
                                                   slot_start:slot_end].fillna(
                                                       0).sum(axis=1)
    summary_df = summary_df.fillna(0)
    df_slots = pd.DataFrame()
    for n in tqdm(slots + ['NO_PURCHASE']):
        features = pd.DataFrame()
        features['primary_key'] = summary_df['primary_key']
        features['arrival'] = summary_df['ARRIVAL_DAY']
        features['cut'] = summary_df['ARRIVAL_CAT']
        features['slot'] = n
        features['day'] = n[0]
        if n != 'NO_PURCHASE':
            features['capacity'] = summary_df[n + '_Capacity']
            features['discount'] = summary_df[n + '_Discount']
            features['eco'] = summary_df[n + '_Eco']
            features['capacity_avg_global'] = (
                summary_df['capacity_sum_global'] -
                summary_df[n + '_Capacity']) / (len(slots) - 1)
            features['capacity_avg_day'] = (
                summary_df['capacity_sum_day_' + str(n[0])] -
                summary_df[n + '_Capacity']) / (slots_per_day[int(n[0])] - 1)
        else:
            features['day'] = n
            features['capacity'] = 1
            features['discount'] = 0
            features['eco'] = 0
            features['capacity_avg_global'] = 1
            features['capacity_avg_day'] = 1
        features['order'] = summary_df[n]
        features['avail'] = summary_df['C' + n]
        df_slots = df_slots.append(features)
    df_slots = u.col_to_one_hot(df_slots, 'slot', prefix='slot', delete=False)
    if save:
        df_slots.to_csv(
            os.path.join(zone_path, 'RankData',
                         zone_file_prefix + 'unstacked.csv'))
    return df_slots
示例#8
0
def create_rank_data_by_zone(zone, computername):
    slots_offered_rank = []
    location, filename = get_zone_output_path(zone, computername)
    hourlist = get_hour_list(zone)
    summary = pd.read_csv(os.path.join(location, filename + 'Summary.csv'))
    slots_offered, _ = u.get_slots_active_ranked(location, filename, summary)
    for i in range(1, 8):
        for j in list(hourlist.values()):
            slots_offered_rank = np.append(slots_offered_rank,
                                           str(i) + '_' + j)
    summaries = []
    for day in ['0', '1', '2', '3', '4', '5', '6']:
        rank_folder = os.path.join(location, 'RankData')
        rank_file_prefix = filename + 'Arrival_Day_{}_Summary.csv'.format(day)
        summary_day = summary[summary['ARRIVAL_DAY'] == int(day)]
        if day == '1':
            daylist = {
                '2': '1',
                '3': '2',
                '4': '3',
                '5': '4',
                '6': '5',
                '0': '6',
                '1': '7'
            }
        elif day == '2':
            daylist = {
                '2': '7',
                '3': '1',
                '4': '2',
                '5': '3',
                '6': '4',
                '0': '5',
                '1': '6'
            }
        elif day == '3':
            daylist = {
                '2': '6',
                '3': '7',
                '4': '1',
                '5': '2',
                '6': '3',
                '0': '4',
                '1': '5'
            }
        elif day == '4':
            daylist = {
                '2': '5',
                '3': '6',
                '4': '7',
                '5': '1',
                '6': '2',
                '0': '3',
                '1': '4'
            }
        elif day == '5':
            daylist = {
                '2': '4',
                '3': '5',
                '4': '6',
                '5': '7',
                '6': '1',
                '0': '2',
                '1': '3'
            }
        elif day == '6':
            daylist = {
                '2': '3',
                '3': '4',
                '4': '5',
                '5': '6',
                '6': '7',
                '0': '1',
                '1': '2'
            }
        elif day == '0':
            daylist = {
                '2': '2',
                '3': '3',
                '4': '4',
                '5': '5',
                '6': '6',
                '0': '7',
                '1': '1'
            }
        summary_day = summary_day.rename(
            columns=lambda x: daylist[x[0]] + '_' + hourlist[x[2:15]] + x[15:]
            if x in [col for col in slots_offered] + [
                col + '_Eco' for col in slots_offered
            ] + [col + '_Discount' for col in slots_offered
                 ] + [col + '_Capacity' for col in slots_offered] else x)
        summary_day = summary_day.rename(
            columns=lambda x: x[0] + daylist[x[1]] + '_' + hourlist[x[3:16]] +
            x[16:] if x in ['C' + col for col in slots_offered] else x)
        summary_day['SLOT_CHOICE'] = summary_day['SLOT_CHOICE'].fillna(
            'NO_PURCHASE')
        summary_day['SLOT_CHOICE'] = summary_day['SLOT_CHOICE'].apply(
            lambda row: daylist[row[0]] + '_' + hourlist[row[2:15]] + row[15:]
            if row in [col for col in slots_offered] else row)
        summary_day.to_csv(os.path.join(
            rank_folder, filename + 'Arrival_Day_{}_Summary.csv'.format(day)),
                           index=False)
        summaries.append(summary_day)
    summary = pd.concat(summaries, axis=0, sort=False)
    print(slots_offered, list(summary.columns))
    summary = summary[[
        'EVENT_DTM', 'CUSTOMER_ID', 'ORDER_ID', 'ARRIVAL_CAT', 'ARRIVAL_DAY',
        'DAY_OF_ORDER', 'CUT_OFF_1', 'CUT_OFF_2', 'SLOT_CHOICE', 'SLOT_STAMP',
        'WINDOWS_STEERING', 'ZONE', 'ESTIMATED_SUBTOTAL', 'TOTAL_TIMESLOTS',
        'CENSORED', 'TOTAL_ORDER'
    ] + [col for col in np.append('NO_PURCHASE', slots_offered_rank)] + [
        'C' + col for col in np.append('NO_PURCHASE', slots_offered_rank)
    ] + [col + '_Eco' for col in slots_offered_rank] +
                      [col + '_Discount' for col in slots_offered_rank] +
                      [col + '_Capacity' for col in slots_offered_rank]]
    summary['CUT2'] = summary['ARRIVAL_CAT'].apply(
        lambda x: 1 if x == 'BEFORE_CUT2' else 0)
    for day in range(0, 7):
        summary[str(day) + '_ARRIVAL'] = summary['ARRIVAL_DAY'].apply(
            lambda x: 1 if x == day else 0)
    summary.to_csv(os.path.join(rank_folder, filename + 'SummaryNew.csv'),
                   index=False)