Пример #1
0
# insert stovepipe job result into new column of proposal (month_form)
# this indexes the jobs with empkeys (orig_jobs is an ndarray only)

df_proposal['orig_job'] = orig_jobs

# ASSIGN JOBS - flush and no flush option*

# cmonths - career length in months for each employee.
#   length is equal to number of employees
cmonths = f.career_months_df_in(df_proposal)

# nonret_each_month: count of non-retired employees remaining
# in each month until no more remain -
# length is equal to longest career length
nonret_each_month = f.count_per_month(cmonths)
all_months = np.sum(nonret_each_month)
cumulative = nonret_each_month.cumsum()
np_low_limits = f.make_lower_slice_limits(cumulative)

job_level_counts = np.array(jcnts_arr[1])

if cf.delayed_implementation:

    imp_month = cf.imp_month
    imp_low = np_low_limits[imp_month]
    imp_high = cumulative[imp_month]

    dstand = pd.read_pickle(stand_path_string)
    ds_option = dstand[['job_count', 'lspcnt',
                        'spcnt', 'rank_in_job', 'jobp']]
def main():

    script, case = argv

    os.makedirs('dill/', exist_ok=True)

    try:
        # check to see if file exists and get value if it does
        case_dill_value = pd.read_pickle('dill/case_dill.pkl').case.value
    except OSError:
        case_dill_value = 'empty_placeholder'

    if case_dill_value == case:
        # if stored value is same as case study name, remove the files
        # which will be replaced.  Removal of old files then writing the
        # new files to disk is faster than overwriting the old files.
        if os.path.isdir('dill/'):

            clear_files = [
                'squeeze_vals.pkl', 'last_month.pkl', 'dict_color.pkl',
                'dict_settings.pkl', 'dict_attr.pkl', 'master.pkl',
                'pay_table_enhanced.pkl', 'pay_table_basic.pkl'
            ]

            filelist = \
                [pkl for pkl in os.listdir('dill/') if pkl in clear_files]
            for pkl in filelist:
                os.remove('dill/' + pkl)
    else:
        # if the case name is different, delete all dill files (stored
        # calculated files).
        # create new case_dill.pkl file
        f.clear_dill_files()
        case_dill = pd.DataFrame({'case': case}, index=['value'])
        case_dill.to_pickle('dill/case_dill.pkl')

    # START THE SETTINGS DICTIONARY - POPULATE WITH THE SCALARS ONLY
    # some of these values will be used for pay data calculation
    # Then some of the calculated pay data is used to further populate the
    # settings dictionary

    xl = pd.read_excel('excel/' + case + '/settings.xlsx', sheet_name=None)
    settings = defaultdict(int)
    # ## scalars
    settings.update(f.make_dict_from_columns(xl['scalars'], 'option', 'value'))

    # PAY TABLES $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

    xl_pay_path = 'excel/' + case + '/pay_tables.xlsx'

    # read pay table data from excel file
    pay_rates = pd.read_excel(xl_pay_path, sheet_name='rates')
    # read monthly pay hours per job level and job description from excel file
    pay_hours = pd.read_excel(xl_pay_path, sheet_name='hours')

    # inputs to determine global sorting master year and
    # longevity...function parameters
    # include second set of parameters for enhanced model and set to None
    # check for not None to alter for enhanced sort...
    year = settings['pay_table_year_sort']
    longevity = settings['pay_table_longevity_sort']

    # instantiate dict values to None
    basic_compen = None
    full_mth_compen = None
    part_mth_compen = None
    job_key_enhan = None
    job_key_basic = None
    basic = None
    enhanced = None
    job_dict_df = None

    # numpy unique returns a SORTED array of unique elements
    contract_years = np.unique(pay_rates.year)
    settings['contract_end'] = max(contract_years)
    settings['contract_years'] = contract_years

    # extract integer column names (represents years of pay longevity)
    longevity_cols = []
    for col in pay_rates.columns.values.tolist():
        try:
            int(col)
            longevity_cols.append(col)
        except ValueError:
            pass

    table_cols = ['year', 'jnum']
    table_cols.extend(longevity_cols)

    basic = pd.merge(pay_rates, pay_hours)

    # For enhanced_jobs:
    enhanced_full = basic.copy()
    enhanced_part = basic.copy()

    # SELECTED COLUMNS MULTIPLIED BY A DESIGNATED COLUMN ROW VALUE

    basic[longevity_cols] = basic[longevity_cols]\
        .multiply(basic['basic_hours'], axis="index")

    # sort by year and job level and only keep columns: 'year', 'jnum',
    # and all year longevity (integer) columns

    basic_compen = basic.sort_values(['year', 'jnum'])[table_cols]\
        .set_index('year', drop=True)

    # create small dataframes for furloughed pay data (no pay)
    fur_rows = pd.DataFrame(0.,
                            index=np.arange(len(contract_years)),
                            columns=basic.columns)

    basic_fur_rows = fur_rows.copy()
    basic_fur_rows.jnum = basic.jnum.max() + 1
    basic_fur_rows.year = contract_years
    basic_fur_rows.jobstr = 'FUR'

    # CONCATENATE the furlough pay data to the basic and enhanced pay data
    basic = pd.concat([basic, basic_fur_rows])

    # select a SECTION OF THE PAY DATA TO USE AS A MASTER ORDER
    # for entire pay dataframe(s).
    # In other words, the job level order of the entire pay
    # dataframe will match the selected year and pay longevity
    # order, even if certain year and pay level compensation
    # amounts are not in descending order.
    # The order must be consistent for the data model.
    order_basic = basic[basic.year == year][['jnum', longevity, 'jobstr']]\
        .sort_values(longevity, ascending=False)

    order_basic['order'] = np.arange(len(order_basic)) + 1

    job_key_basic = order_basic[['order', 'jobstr', 'jnum']].copy()

    # make a dataframe to save the job level hierarchy

    job_key_basic.set_index('order', drop=True, inplace=True)
    job_key_basic.rename(columns={'jnum': 'orig_order'}, inplace=True)

    # this is the way to sort each job level heirarchy for each year.
    # this dataframe is merged with the 'enhanced' dataframe
    # then enhanced is sorted by year and order columns
    order_basic = order_basic.reset_index()[['jnum', 'order']]

    basic = pd.merge(basic, order_basic).sort_values(['year', 'order'])\
        .reset_index(drop=True)

    basic.jnum = basic.order

    basic_df = basic[table_cols].copy()

    # MELT AND INDEX - CREATING INDEXED MONTHLY PAY DATAFRAME(S)
    melt_basic = pd.melt(basic_df,
                         id_vars=['year', 'jnum'],
                         var_name='scale',
                         value_name='monthly')

    melt_basic['ptindex'] = (melt_basic.year * 100000 +
                             melt_basic.scale * 100 + melt_basic.jnum)

    melt_basic.drop(['scale', 'year', 'jnum'], axis=1, inplace=True)
    melt_basic.sort_values('ptindex', inplace=True)
    melt_basic.set_index('ptindex', drop=True, inplace=True)
    melt_basic.to_pickle('dill/pay_table_basic.pkl')

    # Calculate for enhanced_jobs and write to workbook
    # ENHANCED JOBS

    # calculate monthly compensation for each job level and pay longevity
    enhanced_full[longevity_cols] = enhanced_full[longevity_cols]\
        .multiply(enhanced_full['full_hours'], axis="index")

    enhanced_part[longevity_cols] = enhanced_part[longevity_cols]\
        .multiply(enhanced_part['part_hours'], axis="index")

    # ENHANCED TABLE SUFIXES, COLUMNS, JNUMS(ENHANCED_PART)

    # make enhanced_part (fewer hours per position per month)
    # jnums begin with maximum enhanced_full jnum + 1 and
    # increment upwards
    enhanced_part.jnum = enhanced_part.jnum + enhanced_part.jnum.max()

    # sort by year and job level and only keep columns: 'year', 'jnum',
    # and all year longevity (integer) columns

    full_mth_compen = enhanced_full.sort_values(['year',
                                                'jnum'])[table_cols]\
        .set_index('year', drop=True)
    part_mth_compen = enhanced_part.sort_values(['year',
                                                'jnum'])[table_cols]\
        .set_index('year', drop=True)

    # add appropriate suffixes to jobstr columns for full
    # and part enhanced tables
    full_suf = settings['enhanced_jobs_full_suffix']
    part_suf = settings['enhanced_jobs_part_suffix']
    enhanced_full.jobstr = enhanced_full.jobstr.astype(str) + full_suf
    enhanced_part.jobstr = enhanced_part.jobstr.astype(str) + part_suf

    # CONCATENATE the full and part(-time) enhanced jobs dataframes
    enhanced = pd.concat([enhanced_full, enhanced_part])

    enhan_fur_rows = fur_rows.copy()
    enhan_fur_rows.jnum = enhanced.jnum.max() + 1
    enhan_fur_rows.year = contract_years
    enhan_fur_rows.jobstr = 'FUR'

    # CONCATENATE the furlough pay data to the basic and
    # enhanced pay data
    enhanced = pd.concat([enhanced, enhan_fur_rows])

    # select a SECTION OF THE PAY DATA TO USE AS A MASTER ORDER
    # for entire pay dataframe(s).
    order_enhan = \
        enhanced[enhanced.year == year][['jnum', longevity, 'jobstr']]\
        .sort_values(longevity, ascending=False)

    order_enhan['order'] = np.arange(len(order_enhan)) + 1
    job_key_enhan = order_enhan[['order', 'jobstr', 'jnum']].copy()

    # make a dataframe to assist with job dictionary construction
    # (case_specific config file variable 'jd')

    s = job_key_enhan['jnum'].reset_index(drop=True)
    jobs = np.arange((s.max() - 1) / 2) + 1
    j_cnt = jobs.max()
    idx_list1 = []
    idx_list2 = []
    for job_level in jobs:
        idx_list1.append(s[s == job_level].index[0] + 1)
        idx_list2.append(s[s == job_level + j_cnt].index[0] + 1)

    dict_data = (('job', jobs.astype(int)), ('full', idx_list1), ('part',
                                                                  idx_list2),
                 ('jobstr', list(job_key_basic.jobstr[:int(j_cnt)])),
                 ('full_pcnt', list(pay_hours.full_pcnt)))
    # use of ordered dict preserves column order
    job_dict_df = pd.DataFrame(data=od(dict_data)).set_index('job', drop=True)

    # make a dataframe to save the job level hierarchy

    job_key_enhan.set_index('order', drop=True, inplace=True)
    job_key_enhan.rename(columns={'jnum': 'concat_order'}, inplace=True)
    order_enhan = order_enhan.reset_index()[['jnum', 'order']]
    enhanced = pd.merge(enhanced,
                        order_enhan).sort_values(['year', 'order'])\
        .reset_index(drop=True)

    enhanced.jnum = enhanced.order
    enhanced_df = enhanced[table_cols].copy()

    # MELT AND INDEX - CREATING INDEXED MONTHLY PAY DATAFRAME(S)

    melt_enhan = pd.melt(enhanced_df,
                         id_vars=['year', 'jnum'],
                         var_name='scale',
                         value_name='monthly')

    melt_enhan['ptindex'] = (melt_enhan.year * 100000 +
                             melt_enhan.scale * 100 + melt_enhan.jnum)

    melt_enhan.drop(['scale', 'year', 'jnum'], axis=1, inplace=True)
    melt_enhan.sort_values('ptindex', inplace=True)
    melt_enhan.set_index('ptindex', drop=True, inplace=True)
    melt_enhan.to_pickle('dill/pay_table_enhanced.pkl')

    # WRITE PAY DATA TO EXCEL FILE - WITHIN CASE-NAMED FOLDER
    # WITHIN THE 'REPORTS' FOLDER

    path = 'reports/' + case + '/'
    os.makedirs(path, exist_ok=True)

    writer = pd.ExcelWriter(path + 'pay_table_data.xlsx')
    # string to dataframe items for ws_dict
    dict_items = (('basic (no sort)',
                   basic_compen), ('enhanced full (no sort)', full_mth_compen),
                  ('enhanced part (no sort)', part_mth_compen),
                  ('basic ordered', basic), ('enhanced ordered', enhanced),
                  ('basic job order',
                   job_key_basic), ('enhanced job order',
                                    job_key_enhan), ('job dict', job_dict_df))

    ws_dict = od(dict_items)
    # write pay data dataframes to workbook
    for key, value in ws_dict.items():
        try:
            value.to_excel(writer, key)
        except:
            pass

    writer.save()

    # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # dict items from calculated pay data - refactor to eliminate reading file.
    # just use variables from above...

    xl_pay = pd.read_excel(
        'reports/' + case + '/pay_table_data.xlsx',
        sheet_name=['basic job order', 'enhanced job order', 'job dict'])
    df_jd = xl_pay['job dict']
    df_jd['list_cols'] = f.make_lists_from_columns(
        xl_pay['job dict'], ['full', 'part', 'full_pcnt'])

    settings['jd'] = f.make_dict_from_columns(df_jd, 'job', 'list_cols')

    if settings['enhanced_jobs']:
        descr_df = xl_pay['enhanced job order']
    else:
        descr_df = xl_pay['basic job order']

    job_strings = list(descr_df.jobstr)
    settings['job_strs'] = job_strings
    settings['job_strs_dict'] = od(enumerate(job_strings, 1))

    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

    # ADD MORE ITEMS TO SETTINGS DICTIONARY //////////////////////////////////

    pay_ex = xl['pay_exceptions']
    settings['pay_exceptions'] = dict([(i, [
        a, b
    ]) for i, a, b in zip(pay_ex.year_code, pay_ex.start_date, pay_ex.end_date)
                                       ])

    settings['ret_incr'] = \
        f.make_tuples_from_columns(xl['ret_incr'],
                                   ['month_start', 'month_increase'],
                                   return_as_list=False,
                                   return_dates_as_strings=True,
                                   date_cols=['month_start'])

    # ## init_ret_age

    settings['init_ret_age'] = settings['init_ret_age_years'] + \
        (settings['init_ret_age_months'] * 1 / 12)

    # ## ret_incr_dict

    settings['ret_incr_dict'] = od(settings['ret_incr'])

    # ## ret_age

    init_ret_age = settings['init_ret_age']
    if settings['ret_age_increase']:
        ret_dict = settings['ret_incr_dict']
        ret_age = init_ret_age + sum(ret_dict.values()) * (1 / 12)
    else:
        ret_age = init_ret_age

    settings['ret_age'] = ret_age

    start_date = pd.to_datetime(settings['starting_date'])

    # ## imp_month

    imp_date = settings['implementation_date']

    settings['imp_month'] = ((imp_date.year - start_date.year) * 12) - \
        (start_date.month - imp_date.month)

    # ## num_of_job_levels

    if settings['enhanced_jobs']:
        settings['num_of_job_levels'] = settings['job_levels_enhanced']
    else:
        settings['num_of_job_levels'] = settings['job_levels_basic']

    # ## eg_counts

    df = xl['job_counts']
    filter_cols = [
        col for col in df.columns.values.tolist() if str(col).startswith('eg')
    ]
    # if user fails to use "eg" prefix with eg numbers and uses integer
    # headers instead:
    if not filter_cols:
        try:
            filter_cols = [
                col for col in df.columns.values.tolist()
                if type(int(col)) == int
            ]
        except ValueError:
            print('error: eg_counts.  Check that job_count worksheet ' +
                  'headers start with "eg".')
    df_filt = df[filter_cols]
    # sort the columns to ensure proper reference order for standalone counts
    # (in case user input coulumns are not sorted)
    df_filt.sort_index(axis=1, inplace=True)
    eg_counts = []
    for col in df_filt:
        eg_counts.append(list(df_filt[col]))
    settings['eg_counts'] = eg_counts

    # ## j_changes

    df = xl['job_changes']

    start = list(df.month_start)
    end = list(df.month_end)
    jc_set = set()
    for i in np.arange(len(start)):
        jc_set = jc_set.union(set(range(start[i], end[i] + 1)))
    settings['jc_months'] = jc_set

    df['lister1'] = f.make_lists_from_columns(df, ['month_start', 'month_end'])
    filter_cols = \
        [col for col in df.columns.values.tolist() if col.startswith('eg')]
    df['lister2'] = f.make_lists_from_columns(df, filter_cols)
    settings['j_changes'] = f.make_lists_from_columns(
        df, ['job', 'lister1', 'total_change', 'lister2'])

    # ## recalls

    df = xl['recall']
    filter_cols = \
        [col for col in df.columns.values.tolist() if col.startswith('eg')]
    df['lister'] = f.make_lists_from_columns(df, filter_cols)
    settings['recalls'] = f.make_lists_from_columns(
        df, ['total_monthly', 'lister', 'month_start', 'month_end'])

    # ## sg_rights

    df = xl['prex']

    # make count ratio condition month range
    month_start = df.month_start.min()
    month_end = df.month_end.max()
    settings['prex_month_range'] = set(range(month_start, month_end + 1))

    sg_col_list = ['eg', 'job', 'count', 'month_start', 'month_end']
    filter_cols = \
        [col for col in df.columns.values.tolist() if col in sg_col_list]
    settings['sg_rights'] = f.make_lists_from_columns(df, filter_cols)

    # ## ratio_cond

    df = xl['ratio_cond']

    # make count ratio condition month range
    month_start = df.month_start.min()
    month_end = df.month_end.max()
    settings['ratio_month_range'] = set(range(month_start, month_end + 1))

    # make snap_ratio_on_off_dict
    settings['snap_ratio_on_off_dict'] = \
        f.make_dict_from_columns(df, 'basic_job', 'snapshot')

    df_cols = df.columns.values.tolist()
    group_cols = [col for col in df_cols if col.startswith('group')]
    weight_cols = [col for col in df_cols if col.startswith('weight')]
    for col in group_cols:
        df[col] = f.make_group_lists(df, col)

    df['grp_tup'] = f.make_lists_from_columns(df,
                                              group_cols,
                                              remove_zero_values=False,
                                              as_tuples=True)
    df['wgt_tup'] = f.make_lists_from_columns(df,
                                              weight_cols,
                                              remove_zero_values=False,
                                              as_tuples=False)
    df = df[['basic_job', 'grp_tup', 'wgt_tup', 'month_start',
             'month_end']].copy()
    cols = [col for col in df if col != 'basic_job']
    comb = f.make_lists_from_columns(df, cols)
    df = pd.DataFrame({'job': df.basic_job, 'data': comb})
    settings['ratio_dict'] = f.make_dict_from_columns(df, 'job', 'data')

    # ## count_ratio_dict

    df = xl['ratio_count_capped_cond']

    # make count ratio condition month range
    month_start = df.month_start.min()
    month_end = df.month_end.max()
    settings['count_ratio_month_range'] = set(range(month_start,
                                                    month_end + 1))

    # make snap_count_on_off_dict
    settings['snap_count_on_off_dict'] = \
        f.make_dict_from_columns(df, 'basic_job', 'snapshot')

    df_cols = df.columns.values.tolist()
    group_cols = [col for col in df_cols if col.startswith('group')]
    weight_cols = [col for col in df_cols if col.startswith('weight')]
    for col in group_cols:
        df[col] = f.make_group_lists(df, col)
    df['grp_tup'] = f.make_lists_from_columns(df,
                                              group_cols,
                                              remove_zero_values=False,
                                              as_tuples=True)
    df['wgt_tup'] = f.make_lists_from_columns(df,
                                              weight_cols,
                                              remove_zero_values=False,
                                              as_tuples=False)
    df = df[[
        'basic_job', 'grp_tup', 'wgt_tup', 'cap', 'month_start', 'month_end'
    ]].copy()
    cols = [col for col in df if col != 'basic_job']
    comb = f.make_lists_from_columns(df, cols)
    df = pd.DataFrame({'job': df.basic_job, 'data': comb})
    settings['count_ratio_dict'] = f.make_dict_from_columns(df, 'job', 'data')

    # ## p_dict, p_dict_verbose

    df = xl['proposal_dictionary']
    df.short_descr = df.short_descr.astype(str)
    settings['p_dict'] = f.make_dict_from_columns(df, 'proposal',
                                                  'short_descr')
    settings['p_dict_verbose'] = f.make_dict_from_columns(
        df, 'proposal', 'long_descr')

    if settings['enhanced_jobs']:
        jd = settings['jd']
        sg_rights = settings['sg_rights']
        # ratio_cond = settings['ratio_cond']
        count_dict = settings['count_ratio_dict']
        ratio_dict = settings['ratio_dict']

        ratio_onoff = settings['snap_ratio_on_off_dict']
        count_onoff = settings['snap_count_on_off_dict']

        dist_sg = settings['dist_sg']
        dist_ratio = settings['dist_ratio']
        dist_count = settings['dist_count']

        sg_rights, count_dict, ratio_dict, ratio_onoff, count_onoff = \
            cv.convert(job_dict=jd,
                       sg_list=sg_rights,
                       count_ratio_dict=count_dict,
                       ratio_dict=ratio_dict,
                       ratio_onoff_dict=ratio_onoff,
                       count_onoff_dict=count_onoff,
                       dist_sg=dist_sg,
                       dist_ratio=dist_ratio,
                       dist_count_ratio=dist_count)

        settings['sg_rights'] = sg_rights
        settings['snap_ratio_on_off_dict'] = ratio_onoff
        settings['snap_count_on_off_dict'] = count_onoff

    # remove any ratio groups marked with a zero (only may occur with
    # three or more merging groups)
    settings['ratio_dict'] = f.remove_zero_groups(ratio_dict)
    settings['count_ratio_dict'] = f.remove_zero_groups(count_dict)

    snap_ratio_dict = {}
    snap_count_dict = {}
    for job in ratio_dict.keys():
        snap_ratio_dict[job] = ratio_dict[job][2]
    for job in count_dict.keys():
        snap_count_dict[job] = count_dict[job][3]

    settings['snap_ratio_dict'] = snap_ratio_dict
    settings['snap_count_dict'] = snap_count_dict

    # ///////////////////////////////////////////////////////////////////

    # COLOR DICTIONARY~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    color_dict = mcl(num_of_colors=settings['num_of_job_levels'] + 1,
                     return_dict=True)

    if settings['enhanced_jobs']:
        df = xl['enhanced_job_colors']
    else:
        df = xl['basic_job_colors']

    job_colors = f.make_lists_from_columns(df,
                                           ['red', 'green', 'blue', 'alpha'])

    color_dict['job_colors'] = job_colors

    # ## eg_colors, lin_reg_colors, lin_reg_colors2, mean_colors

    short_colors = xl['eg_colors']
    settings['egs'] = set(short_colors.eg.values)
    color_dict['eg_color_dict'] = dict(
        zip(short_colors.eg, short_colors.eg_colors))
    short_cols = [col for col in list(short_colors) if col != 'eg']
    short_colors = xl['eg_colors'][short_cols]
    for col in list(short_colors):
        color_dict[col] = list(short_colors[col])
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ATTRIBUTE DICTIONARY >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    df = xl['attribute_dict']
    attribute_dict = dict(zip(df.col_name, df.col_description))
    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

    # OLD CODE STARTS HERE:  (making pickle files...) *********************

    # MASTER FILE:
    master = pd.read_excel('excel/' + case + '/master.xlsx')

    master.set_index('empkey', drop=False, inplace=True)

    master['retdate'] = master['dob'] + \
        pd.DateOffset(years=settings['init_ret_age_years']) + \
        pd.DateOffset(months=settings['init_ret_age_months'])
    # calculate future retirement age increase(s)
    if settings['ret_age_increase']:
        ret_incr_dict = settings['ret_incr_dict']
        for date, add_months in ret_incr_dict.items():
            master.loc[master.retdate > pd.to_datetime(date) +
                       pd.offsets.MonthEnd(-1), 'retdate'] = \
                master.retdate + pd.DateOffset(months=add_months)

    # only include employees who retire during or after the starting_month
    # (remove employees who retire prior to analysis period)
    master = master[master.retdate >= start_date - pd.DateOffset(months=1) +
                    pd.DateOffset(days=1)]

    master.to_pickle('dill/master.pkl')

    # ACTIVE EACH MONTH (no consideration for job changes or recall, only
    # calculated on retirements of active employees as of start date)
    emps_to_calc = master[master.line == 1].copy()
    cmonths = f.career_months(emps_to_calc, settings['starting_date'])

    # LIST ORDER PROPOSALS
    # Read the list ordering proposals from an Excel workbook, add an index
    # column ('idx'), and store each proposal as a dataframe in a pickled file.
    # The proposals are contained on separate worksheets.
    # The routine below will loop through the worksheets.
    # The worksheet tab names are important for the function.
    # The pickle files will be named like the workbook sheet names.

    xl = pd.ExcelFile('excel/' + case + '/proposals.xlsx')

    sheets = xl.sheet_names
    # make dataframe containing proposal names and store it
    # (will be utilized by load_datasets function)
    sheets_df = pd.DataFrame(sheets, columns=['proposals'])
    sheets_df.to_pickle('dill/proposal_names.pkl')

    for ws in sheets:
        try:
            df = xl.parse(ws)[['empkey']]
            df.set_index('empkey', inplace=True)
            df['idx'] = np.arange(len(df)).astype(int) + 1
            df.to_pickle('dill/p_' + ws + '.pkl')
        except:
            print('proposal worksheet', ws, 'skipped during processing')
            continue

    # LAST MONTH
    # percent of month for all employee retirement dates.
    # Used for retirement month pay.

    df_dates = master[['retdate']].copy()
    df_dates['day_of_month'] = df_dates.retdate.dt.day
    df_dates['days_in_month'] = (df_dates.retdate +
                                 pd.offsets.MonthEnd(0)).dt.day
    df_dates['last_pay'] = df_dates.day_of_month.values / \
        df_dates.days_in_month.values

    df_dates.set_index('retdate', inplace=True)
    df_dates = df_dates[['last_pay']]
    df_dates.sort_index(inplace=True)
    df_dates = df_dates[~df_dates.index.duplicated()]
    df_dates.to_pickle('dill/last_month.pkl')
    # ********************************************************************

    # JOB TABLES AND RELATED DICTIONARY___________________________________
    # create job tables (standalone and integrated), store as dictionary
    # (also job changes and job counts input arrays)

    # JOB_ASSIGN_FILTER_TABLE 1
    master_copy = master[['retdate', 'line', 'fur']].copy()
    # only active employees...
    df_actives = master_copy[master_copy.line == 1]
    # only furloughees...
    df_fur = master_copy[master_copy.fur == 1]
    cmonths = f.career_months(df_actives, settings['starting_date'])
    cmonths_fur = f.career_months(df_fur, settings['starting_date'])
    active_each_month = f.count_per_month(cmonths)
    fur_left_each_month = f.count_per_month(cmonths_fur)
    num_of_months = active_each_month.size

    num_of_job_levels = settings['num_of_job_levels']

    if settings['enhanced_jobs']:
        # use job dictionary(jd) from settings dictionary for conversion
        eg_counts, j_changes = f.convert_to_enhanced(settings['eg_counts'],
                                                     settings['j_changes'],
                                                     settings['jd'])
    else:
        eg_counts = settings['eg_counts']
        j_changes = settings['j_changes']

    # compute job counts array
    jcnts_arr = f.make_jcnts(eg_counts)

    s_table = f.job_gain_loss_table(num_of_months,
                                    num_of_job_levels,
                                    jcnts_arr,
                                    j_changes,
                                    standalone=True)

    table = f.job_gain_loss_table(num_of_months,
                                  num_of_job_levels,
                                  jcnts_arr,
                                  j_changes,
                                  standalone=False)

    # JOB_ASSIGN_FILTER_TABLE 2
    # this array will contain the number of originally furloughed employees
    # who remain under the retirement age
    fur_arr = np.zeros(num_of_months)
    np.put(fur_arr, np.arange(fur_left_each_month.size), fur_left_each_month)

    # this array will hold the cumulative furlough recall counts
    recall_arr = np.zeros(num_of_months)
    # loop through each recall schedule and make an array of of cumulative
    # recall counts
    for recall in settings['recalls']:
        recall_add = np.zeros(num_of_months)
        np.put(recall_add, np.arange(recall[2], recall[3]), recall[0])
        np.cumsum(recall_add, out=recall_add)
        # add this recall cumsum to main recall_arr (for each recall schedule)
        recall_arr = recall_arr + recall_add

    # limit each months cumulative recall count if monthly count of remaining
    # furloughed employees is less
    additive_arr = np.minimum(fur_arr, recall_arr)

    # add 2 zero columns in front of job count table
    zero_table = f.add_zero_col(f.add_zero_col(table[0]))

    # create accumulative table of job counts, left to right for comparison
    accum_table = np.add.accumulate(zero_table, axis=1)

    # create employee count limit array to compare with cumulative job counts
    if settings['recall']:
        limit_arr = (active_each_month + additive_arr).astype(int)
    else:
        limit_arr = active_each_month.astype(int)

    limit_arr = limit_arr[:, None]

    # perform a truth test on accum_table, False results will cause job
    # loop(s) for a month to be skipped with the assign_standalone_job_changes
    # function
    loop_check = np.less_equal(accum_table, limit_arr)

    table_dict = {
        's_table': s_table,
        'table': table,
        'j_changes': j_changes,
        'jcnts_arr': jcnts_arr,
        'loop_check': loop_check
    }

    # ___________________________________________________________________

    # SQUEEZE_VALS ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    # initial values for editor tool widgets.
    # The values stored within this file will be replaced and
    # updated by the editor tool when it is utilized.

    # initial max range for edit slider
    edit_max = len(df_actives) + len(df_fur)
    x_low = int(.35 * edit_max)
    x_high = int(.65 * edit_max)

    editor_dict = {
        'base_ds_name': '',
        'box_line_color': 'black',
        'box_line_alpha': '.8',
        'box_fill_color': 'black',
        'box_fill_alpha': '.05',
        'box_line_width': '1.0',
        'case': case,
        'chk_color_apply': [0],
        'chk_display': [0],
        'chk_filter': [1],
        'chk_hover_sel': [],
        'chk_hover_on': [],
        'chk_minor_grid': [],
        'chk_scatter': True,
        'chk_poly_fit': False,
        'chk_trails': [],
        'chk_mean': False,
        'chk_sagov': False,
        'cht_xsize': 1200,
        'cht_ysize': 580,
        'cht_xflipped': False,
        'cht_yflipped': False,
        'cht_title': 'spcnt',
        'cht_xformat': '0',
        'cht_yformat': '0.0%',
        'edit_max': edit_max,
        'ez_end': edit_max,
        'ez_step': 5,
        'minor_grid_alpha': 0.0,
        'num_of_months': num_of_months,
        'p2_marker_alpha': .8,
        'p2_marker_size': 2.2,
        'sel_base': 'standalone',
        'sel_bgc': 'White',
        'sel_bgc_alpha': '.10',
        'sel_cond': 'none',
        'sel_emp_grp': '1',
        'sel_filt1': '',
        'sel_filt2': '',
        'sel_filt3': '',
        'sel_gridc': 'Gray',
        'sel_gridc_alpha': '.20',
        'sel_measure': 'spcnt',
        'sel_proposal': 'edit',
        'sel_mth_oper': '>=',
        'sel_mth_num': '0',
        'sel_oper1': '==',
        'sel_oper2': '==',
        'sel_oper3': '==',
        'sel_sqz_dir': '<<  d',
        'sel_sqz_type': 'log',
        'sel_xtype': 'prop_s',
        'sel_ytype': 'diff',
        'slider_squeeze': 100,
        'total_count': edit_max,
        'txt_input1': '',
        'txt_input2': '',
        'txt_input3': '',
        'x_high': x_high,
        'x_low': x_low
    }

    with open('dill/editor_dict.pkl', 'wb') as handle:
        pickle.dump(editor_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    # WRITE DICTIONARIES TO DISC ==========================================

    with open('dill/dict_settings.pkl', 'wb') as handle:
        pickle.dump(settings, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('dill/dict_color.pkl', 'wb') as handle:
        pickle.dump(color_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('dill/dict_attr.pkl', 'wb') as handle:
        pickle.dump(attribute_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('dill/dict_job_tables.pkl', 'wb') as handle:
        pickle.dump(table_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
Пример #3
0
# insert stovepipe job result into new column of proposal (month_form)
# this indexes the jobs with empkeys (orig_jobs is an ndarray only)

df_proposal['orig_job'] = orig_jobs

# ASSIGN JOBS - flush and no flush option*

# cmonths - career length in months for each employee.
#   length is equal to number of employees
cmonths = f.career_months_df_in(df_proposal)

# nonret_each_month: count of non-retired employees remaining
# in each month until no more remain -
# length is equal to longest career length
nonret_each_month = f.count_per_month(cmonths)
all_months = np.sum(nonret_each_month)
cumulative = nonret_each_month.cumsum()
np_low_limits = f.make_lower_slice_limits(cumulative)

job_level_counts = np.array(jcnts_arr[1])

if cf.delayed_implementation:

    imp_month = cf.imp_month
    imp_low = np_low_limits[imp_month]
    imp_high = cumulative[imp_month]

    dstand = pd.read_pickle(stand_path_string)
    ds_option = dstand[['job_count', 'lspcnt', 'spcnt', 'rank_in_job', 'jobp']]
    dstand = dstand[['mnum', 'jnum', 'empkey', 'fur']][:imp_high]
Пример #4
0
def main():

    script, proposal_name, *conditions = argv

    pre, suf = 'dill/', '.pkl'

    skeleton_path_string = (pre + 'skeleton' + suf)

    proposal_order_string = (pre + 'p_' + proposal_name + suf)

    stand_path_string = (pre + 'standalone' + suf)

    output_name = 'ds_' + proposal_name

    try:
        df_master = pd.read_pickle(pre + 'master' + suf)
    except OSError:
        print('Master list not found.  Run build_program_files script?')
        print('\n  >>> exiting routine.\n')
        exit()

    try:
        ds = pd.read_pickle(skeleton_path_string)
    except OSError:
        print('\nSkeleton file not found. ' +
              'Run build_program_files script?\n\n' +
              'Standalone build failed.\n\n' +
              '  >>> exiting routine.\n')
        exit()

    try:
        df_order = pd.read_pickle(proposal_order_string)
    except OSError:
        prop_names = \
            pd.read_pickle('dill/proposal_names.pkl').proposals.tolist()
        stored_case = pd.read_pickle('dill/case_dill.pkl').case.value
        print('\nerror : proposal name "' +
              str(proposal_name) + '" not found...\n')
        print('available proposal names are ', prop_names,
              'for case study:',
              stored_case)
        print('\n  >>> exiting routine.\n')
        exit()

    sdict = pd.read_pickle('dill/dict_settings.pkl')
    tdict = pd.read_pickle('dill/dict_job_tables.pkl')

    # do not include inactive employees (other than furlough) in data model
    df_master = df_master[
        (df_master.line == 1) | (df_master.fur == 1)].copy()

    num_of_job_levels = sdict['num_of_job_levels']
    lspcnt_calc = sdict['lspcnt_calc_on_remaining_population']

    # ORDER the skeleton df according to INTEGRATED list order.
    # df_skel can initially be in any integrated order, each employee
    # group must be in proper order relative to itself.
    # Use the short-form 'idx' (order) column from either the proposed
    # list or the new_order column from an edited list to create a new column,
    # 'new_order', within the long-form df_skel.  The new order column
    # is created by data alignment using the common empkey indexes.
    # The skeleton may then be sorted by month and new_order.
    # (note: duplicate df_skel empkey index empkeys (from different months)
    # are assigned the same order value)

    if 'edit' in conditions:
        df_new_order = pd.read_pickle(proposal_order_string)
        ds['new_order'] = df_new_order['new_order']
        dataset_path_string = (pre + 'ds_edit' + suf)
    else:
        try:
            order_key = df_order.idx
        except:
            order_key = df_order.new_order
        ds['new_order'] = order_key
        dataset_path_string = (pre + output_name + suf)

    if os.path.isdir('dill/'):
        try:
            os.remove(dataset_path_string)
        except OSError:
            pass

    # sort the skeleton by month and proposed list order
    ds.sort_values(['mnum', 'new_order'], inplace=True)

    # ORIG_JOB*

    eg_sequence = df_master.eg.values
    fur_sequence = df_master.fur.values

    # create list of employee group codes from the master data
    egs = sorted(pd.unique(eg_sequence))
    # retrieve job counts array
    jcnts_arr = tdict['jcnts_arr']

    if 'prex' in conditions:

        sg_rights = sdict['sg_rights']
        sg_eg_list = []
        sg_dict = od()
        stove_dict = od()

        # Find the employee groups which have pre-existing job rights...
        # grab the eg code from each sg (special group) job right description
        # and add to sg_eg_list
        for line_item in sg_rights:
            sg_eg_list.append(line_item[0])
        # place unique eg codes into sorted list
        sg_eg_list = sorted(pd.unique(sg_eg_list))

        # Make a dictionary containing the special group data for each
        # group with special rights
        for eg in sg_eg_list:
            sg_data = []
            for line_item in sg_rights:
                if line_item[0] == eg:
                    sg_data.append(line_item)
            sg_dict[eg] = sg_data

        for eg in egs:

            if eg in sg_eg_list:
                # (run prex stovepipe routine with eg dict key and value)
                sg = df_master[df_master.eg == eg]['sg'].values
                fur = df_master[df_master.eg == eg]['fur']
                ojob_array = f.make_stovepipe_prex_shortform(
                    jcnts_arr[0][eg - 1], sg, sg_dict[eg], fur)
                prex_stove = np.take(ojob_array, np.where(fur == 0)[0])
                stove_dict[eg] = prex_stove
            else:
                # (run make_stovepipe routine with eg dict key and value)
                stove_dict[eg] = f.make_stovepipe_jobs_from_jobs_arr(
                    jcnts_arr[0][eg - 1])

        # use dict values as inputs to sp_arr,
        # ordered dict maintains proper sequence...
        sp_arr = list(np.array(list(stove_dict.values())))
        # total of jobs per eg
        eg_job_counts = np.add.reduce(jcnts_arr[0], axis=1)

        orig_jobs = f.make_intgrtd_from_sep_stove_lists(sp_arr,
                                                        eg_sequence,
                                                        fur_sequence,
                                                        eg_job_counts,
                                                        num_of_job_levels)

    else:

        orig_jobs = f.make_original_jobs_from_counts(
            jcnts_arr[0], eg_sequence,
            fur_sequence, num_of_job_levels).astype(int)

    # insert stovepipe job result into new column of proposal (month_form)
    # this indexes the jobs with empkeys (orig_jobs is an ndarray only)

    df_master['orig_job'] = orig_jobs

    # ASSIGN JOBS - flush and no flush option*

    # cmonths - career length in months for each employee.
    #   length is equal to number of employees
    cmonths = f.career_months(df_master, sdict['starting_date'])

    # nonret_each_month: count of non-retired employees remaining
    # in each month until no more remain -
    # length is equal to longest career length
    nonret_each_month = f.count_per_month(cmonths)
    all_months = np.sum(nonret_each_month)
    high_limits = nonret_each_month.cumsum()
    low_limits = f.make_lower_slice_limits(high_limits)

    if sdict['delayed_implementation']:

        imp_month = sdict['imp_month']
        imp_low = low_limits[imp_month]
        imp_high = high_limits[imp_month]

        # read the standalone dataset (info is not in integrated order)
        ds_stand = pd.read_pickle(stand_path_string)

        # get standalone data and order it the same as the integrated dataset.
        # create a unique key column in the standalone data df and a temporary
        # df which is ordered according to the integrated dataset
        imp_cols, arr_dict, col_array = \
            f.make_preimp_array(ds_stand, ds,
                                imp_high, sdict['compute_job_category_order'],
                                sdict['compute_pay_measures'])

        # select columns to use as pre-implementation data for integrated
        # dataset data is limited to the pre-implementation months

        # aligned_jnums and aligned_fur arrays are the same as standalone data
        # up to the end of the implementation month, then the standalone value
        # for the implementation month is passed down unchanged for the
        # remainder of months in the model.  These arrays carry over
        # standalone data for each employee group to be honored until and when
        # the integrated list is implemented.
        # These values from the standalone datasets (furlough status and
        # standalone job held at the implementation date) are needed for
        # subsequent integrated dataset job assignment calculations.  Other
        # standalone values are simply copied and inserted into the
        # pre-implementation months of the integrated dataset.

        delayed_jnums = col_array[arr_dict['jnum']]
        delayed_fur = col_array[arr_dict['fur']]

        aligned_jnums = f.align_fill_down(imp_low,
                                          imp_high,
                                          ds[[]],  # indexed with empkeys
                                          delayed_jnums)

        aligned_fur = f.align_fill_down(imp_low,
                                        imp_high,
                                        ds[[]],
                                        delayed_fur)

        # now assign "filled-down" job numbers to numpy array
        delayed_jnums[imp_low:] = aligned_jnums[imp_low:]
        delayed_fur[imp_low:] = aligned_fur[imp_low:]

        # ORIG_JOB and FUR (delayed implementation)
        # then assign numpy array values to orig_job column of integrated
        # dataset as starting point for integrated job assignments
        ds['orig_job'] = delayed_jnums
        ds['fur'] = delayed_fur

        if sdict['integrated_counts_preimp']:
            # assign combined job counts prior to the implementation date.
            # (otherwise, separate employee group counts will be used when
            # data is transferred from col_array at end of script)
            # NOTE:  this data is the actual number of jobs held within each
            # category; could be less than the number of jobs available as
            # attrition occurs
            standalone_preimp_job_counts = \
                f.make_delayed_job_counts(imp_month,
                                          delayed_jnums,
                                          low_limits,
                                          high_limits)
            col_array[arr_dict['job_count']][:imp_high] = \
                standalone_preimp_job_counts

    else:
        # set implementation month at zero for job assignment routine
        imp_month = 0

        # ORIG_JOB and FUR (no delayed implementation)
        # transfer proposal stovepipe jobs (month_form) to long_form via index
        # (empkey) alignment...
        ds['orig_job'] = df_master['orig_job']
        # developer note:  test to verify this is not instantiated elsewhere...
        ds['fur'] = df_master['fur']

    table = tdict['table']
    j_changes = tdict['j_changes']

    reduction_months = f.get_job_reduction_months(j_changes)
    # copy selected columns from ds for job assignment function input below.
    # note:  if delayed implementation, the 'fur' and 'orig_job' columns
    # contain standalone data through the implementation month.
    df_align = ds[['eg', 'sg', 'fur', 'orig_job']].copy()

    # JNUM, FUR, JOB_COUNT
    if sdict['no_bump']:

        # No bump, no flush option (includes conditions, furlough/recall,
        # job changes schedules)
        # this is the main job assignment function.  It loops through all of
        # the months in the model and assigns jobs
        nbnf, job_count, fur = \
            f.assign_jobs_nbnf_job_changes(df_align,
                                           low_limits,
                                           high_limits,
                                           all_months,
                                           reduction_months,
                                           imp_month,
                                           conditions,
                                           sdict,
                                           tdict,
                                           fur_return=sdict['recall'])

        ds['jnum'] = nbnf
        ds['job_count'] = job_count
        ds['fur'] = fur
        # for create_snum_and_spcnt_arrays function input...
        jnum_jobs = nbnf

    else:

        # Full flush and bump option (no conditions or
        # furlough/recall schedulue considered, job changes are included)
        # No bump, no flush applied up to implementation date
        fbff, job_count, fur = f.assign_jobs_full_flush_job_changes(
            nonret_each_month, table[0], num_of_job_levels)

        ds['jnum'] = fbff
        ds['job_count'] = job_count
        ds['fur'] = fur
        # for create_snum_and_spcnt_arrays function input...
        jnum_jobs = fbff

    # SNUM, SPCNT, LNUM, LSPCNT

    monthly_job_counts = table[1]

    ds['snum'], ds['spcnt'], ds['lnum'], ds['lspcnt'] = \
        f.create_snum_and_spcnt_arrays(jnum_jobs, num_of_job_levels,
                                       nonret_each_month,
                                       monthly_job_counts,
                                       lspcnt_calc)

    # RANK in JOB

    ds['rank_in_job'] = ds.groupby(['mnum', 'jnum'],
                                   sort=False).cumcount() + 1

    # JOBP

    jpcnt = (ds.rank_in_job / ds.job_count).values
    np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999)

    ds['jobp'] = ds['jnum'] + jpcnt

    # PAY - merge with pay table - provides monthly pay
    if sdict['compute_pay_measures']:

        # account for furlough time (only count active months)
        if sdict['discount_longev_for_fur']:
            # skel(ds) provides pre-calculated non-discounted scale data
            # flip ones and zeros...
            ds['non_fur'] = 1 - ds.fur.values

            non_fur = ds.groupby([pd.Grouper('empkey')])['non_fur'] \
                .cumsum().values
            ds.pop('non_fur')
            starting_mlong = ds.s_lmonths.values
            cum_active_months = non_fur + starting_mlong
            ds['mlong'] = cum_active_months
            ds['ylong'] = ds['mlong'].values / 12
            ds['scale'] = np.clip((cum_active_months / 12) + 1, 1,
                                  sdict['top_of_scale']).astype(int)

        # make a new long_form dataframe and assign a combination of
        # pay-related ds columns from large dataset as its index...
        # the dataframe is empty - we are only making an index-alignment
        # vehicle to use with indexed pay table....
        # the dataframe index contains specific scale, job, and contract year
        # for each line in long_form ds
        df_pt_index = pd.DataFrame(index=((ds['scale'].values * 100) +
                                          ds['jnum'].values +
                                          (ds['year'].values * 100000)))

        if sdict['enhanced_jobs']:
            df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl')
        else:
            df_pt = pd.read_pickle('dill/pay_table_basic.pkl')

        # 'data-align' small indexed pay_table to long_form df:
        df_pt_index['monthly'] = df_pt['monthly']

        ds['monthly'] = df_pt_index.monthly.values

        # MPAY
        # adjust monthly pay for any raise and last month pay percent if
        # applicable
        ds['mpay'] = ((ds['pay_raise'].values *
                       ds['mth_pcnt'].values *
                       ds['monthly'].values)) / 1000

        ds.pop('monthly')

        # CPAY

        ds['cpay'] = ds.groupby('new_order')['mpay'].cumsum()

    if sdict['delayed_implementation']:
        ds_cols = ds.columns
        # grab each imp_col (column to insert standalone or pre-implementation
        # date data) and replace integrated data up through implementation
        # date
        for col in imp_cols:
            if col in ds_cols:
                arr = ds[col].values
                arr[:imp_high] = col_array[arr_dict[col]][:imp_high]
                ds[col] = arr

    # CAT_ORDER
    # global job ranking
    if sdict['compute_job_category_order']:
        ds['cat_order'] = f.make_cat_order(ds, table[0])

    # save to file
    if sdict['save_to_pickle']:
        ds.to_pickle(dataset_path_string)
ds_list = [ds1, ds2, ds3]
short_ds_list = [short_ds1, short_ds2, short_ds3]


for i in range(len(ds_list)):

    df_long = ds_list[i]
    df_short = short_ds_list[i]
    jcnts = jcnts_arr[0][i]
    # jcnts = np.take(jcnts, np.where(jcnts != 0)[0])
    short_len = len(short_ds_list[i])

    # ORIG_JOB*

    cmonths_this_ds = f.career_months_df_in(df_short)
    this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds)
    uppers = this_ds_nonret_each_month.cumsum()
    lowers = f.make_lower_slice_limits(uppers)
    all_months = np.sum(this_ds_nonret_each_month)

    this_table = table[0][i]
    this_month_counts = table[1][i]
    df_align = df_long[['twa', 'fur']]
    fur_codes = np.array(df_align.fur)

    # if i == 0 and cf.apply_supc:  # i == 0 >> eg1 from skeleton

    #     twa_rights = np.array(cf.twa_rights)
    #     twa_jobs = np.transpose(twa_rights)[1]
    #     sup_c_counts = np.transpose(twa_rights)[2]
    #     twa_dict = dict(zip(twa_jobs, sup_c_counts))
Пример #6
0
def main():

    script, *conditions = argv

    input_skel = 'skeleton'

    pre, suf = 'dill/', '.pkl'

    skeleton_path_string = (pre + input_skel + suf)

    try:
        ds = pd.read_pickle(skeleton_path_string)
    except OSError:
        print('\nSkeleton file not found. ' +
              'Run build_program_files script?\n\n' +
              'Standalone build failed.\n\n' +
              '  >>> exiting routine.\n')
        exit()

    if os.path.isdir('dill/'):
        try:
            os.remove('dill/standalone.pkl')
        except OSError:
            pass

    sdict = pd.read_pickle('dill/dict_settings.pkl')
    tdict = pd.read_pickle('dill/dict_job_tables.pkl')

    num_of_job_levels = sdict['num_of_job_levels']
    egs = np.unique(ds.eg)
    start_month = 0

    # make prex True or False
    # (for input to assign_standalone_job_changes function)
    prex = 'prex' in conditions

    table = tdict['s_table']
    jcnts_arr = tdict['jcnts_arr']
    j_changes = tdict['j_changes']

    job_change_months = f.get_job_change_months(j_changes)
    job_reduction_months = f.get_job_reduction_months(j_changes)

    # sort the skeleton by employee group, month, and index
    # (preserves each group's list order)
    ds.sort_values(['eg', 'mnum', 'idx'])

    ds_dict = {}
    short_ds_dict = {}

    for grp in egs:
        ds_dict[grp] = ds[ds.eg == grp].copy()

    for grp in egs:
        short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy()

    ds = pd.DataFrame()

    for eg in egs:

        df_long = ds_dict[eg]
        df_short = short_ds_dict[eg]
        jcnts = jcnts_arr[0][eg - 1]
        short_len = len(df_short)

        # ORIG_JOB*
        cmonths_this_ds = \
            f.career_months(df_short, sdict['starting_date'])
        this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds)
        high_limits = this_ds_nonret_each_month.cumsum()
        low_limits = f.make_lower_slice_limits(high_limits)
        all_months = np.sum(this_ds_nonret_each_month)

        this_eg_table = f.add_zero_col(table[0][eg - 1])
        this_eg_month_counts = table[1][eg - 1]

        df_align_cols = ['fur']
        if 'sg' in df_long:
            df_align_cols.append('sg')

        df_align = df_long[df_align_cols]

        # pre-existing employee group special job assignment is included within
        # the job assignment function below...
        results = f.assign_standalone_job_changes(eg,
                                                  df_align,
                                                  low_limits,
                                                  high_limits,
                                                  all_months,
                                                  this_eg_table,
                                                  this_eg_month_counts,
                                                  this_ds_nonret_each_month,
                                                  job_change_months,
                                                  job_reduction_months,
                                                  start_month,
                                                  sdict,
                                                  tdict,
                                                  apply_sg_cond=prex)

        jnums = results[0]
        count_col = results[1]
        held = results[2]
        fur = results[3]
        orig_jobs = results[4]
        # HELD JOB
        # job from previous month
        df_long['held'] = held
        # JOB_COUNT
        df_long['job_count'] = count_col

        # ORIG_JOB
        df_short['orig_job'] = orig_jobs
        df_long['orig_job'] = df_short['orig_job']

        # ASSIGN JOBS - (stovepipe method only since only
        # assigning within each employee group separately)

        # JNUM

        df_long['jnum'] = jnums

        # SNUM, SPCNT, LNUM, LSPCNT

        monthly_job_counts = table[1][eg - 1]
        lspcnt_calc = sdict['lspcnt_calc_on_remaining_population']

        df_long['snum'], df_long['spcnt'], \
            df_long['lnum'], df_long['lspcnt'] = \
            f.create_snum_and_spcnt_arrays(jnums, num_of_job_levels,
                                           this_ds_nonret_each_month,
                                           monthly_job_counts,
                                           lspcnt_calc)

        # RANK in JOB

        df_long['rank_in_job'] = \
            df_long.groupby(['mnum', 'jnum']).cumcount() + 1

        # JOBP
        # make last percentage position in each job category .99999 vs 1.0
        # so that jobp calculations are correct
        jpcnt = (df_long.rank_in_job / df_long.job_count).values
        np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999)

        df_long['jobp'] = df_long['jnum'] + jpcnt

        # PAY - merge with pay table - provides monthly pay
        if sdict['compute_pay_measures']:

            if sdict['discount_longev_for_fur']:
                # skel provides non-discounted scale data
                # flip ones and zeros...
                df_long['non_fur'] = 1 - fur
                df_long['fur'] = fur

                non_fur = \
                    (df_long.groupby([pd.Grouper('empkey')])
                     ['non_fur'].cumsum().values)
                df_long.pop('non_fur')
                starting_mlong = df_long.s_lmonths.values
                cum_active_months = non_fur + starting_mlong
                df_long['mlong'] = cum_active_months
                df_long['ylong'] = df_long['mlong'] / 12
                df_long['scale'] = \
                    np.clip((cum_active_months / 12) + 1, 1,
                            sdict['top_of_scale']).astype(int)

            # SCALE
            df_pt_index = pd.DataFrame(
                index=(df_long['scale'] * 100) + df_long['jnum'] +
                (df_long['year'] * 100000))

            if sdict['enhanced_jobs']:
                df_pt = pd.read_pickle(
                    'dill/pay_table_enhanced.pkl')
            else:
                df_pt = pd.read_pickle(
                    'dill/pay_table_basic.pkl')

            df_pt_index['monthly'] = df_pt['monthly']

            df_long['monthly'] = df_pt_index.monthly.values

            # MPAY
            # adjust monthly pay for any raise and last month pay percent if
            # applicable

            df_long['mpay'] = (
                (df_long['pay_raise'] *
                 df_long['mth_pcnt'] *
                 df_long['monthly'])) / 1000

            df_long.pop('monthly')

            # CPAY

            df_long['cpay'] = df_long.groupby('idx')['mpay'].cumsum()

        ds = pd.concat([ds, df_long], ignore_index=True)

    ds.sort_values(by=['mnum', 'idx'], inplace=True)
    ds.set_index('empkey', drop=False, verify_integrity=False, inplace=True)

    # CAT_ORDER
    # global job ranking

    if sdict['compute_job_category_order']:
        table = tdict['table']
        ds['cat_order'] = f.make_cat_order(ds, table[0])

    # save to file
    if sdict['save_to_pickle']:
        ds.to_pickle('dill/standalone.pkl')
def main():

    # read prepared list dataframe - proper column headers, column formats...
    # this is master.pkl, order-independent, concatenated list data
    pre, suf = 'dill/', '.pkl'
    master_list = 'master'
    master_path = (pre + master_list + suf)

    try:
        df_mlist = pd.read_pickle(master_path)
    except OSError:
        print('\nMaster list not found.  Run build_program_files script?\n\n' +
              'Skeleton build failed.\n\n' +
              '  >>> exiting routine.\n')
        import sys
        sys.exit()

    output_name = 'skeleton'
    skel_path_string = (pre + output_name + suf)

    sdict = pd.read_pickle('dill/dict_settings.pkl')

    # only include pilots that are not retired prior to the starting_month
    start_date = sdict['starting_date']

    df_mlist = df_mlist[
        df_mlist.retdate >= start_date - pd.DateOffset(months=1)]

    # include furloughees by default
    df = df_mlist[(df_mlist.line == 1) | (df_mlist.fur == 1)].copy()

    df_mlist = []

    # MNUM*
    # calculate the number of career months for each employee (short_form)
    # cmonths is used for mnum, idx, and mth_pcnt calculations

    cmonths = f.career_months(df, start_date)
    # convert the python cmonths list to a numpy array and
    # use that array as input for the count_per_month function.
    # The count_per_month function output array is input for
    # other functions (month_form)

    nonret_each_month = f.count_per_month(cmonths)

    # first long form data generation.
    # month numbers, same month number repeated for each
    # month length (long_form)

    long_form_skeleton = f.gen_month_skeleton(nonret_each_month)

    # this is making a dataframe out of the
    # long_form_skeleton (months) created above.
    # this is the basis for the long_form dataframe...

    # MNUM
    # (month number)

    skel = pd.DataFrame(long_form_skeleton.astype(int), columns=['mnum'])

    # IDX*
    # grab emp index for each remaining
    # employee for each month - used for merging dfs later

    empkey_arr = df.empkey.values

    long_index, long_emp = f.gen_skel_emp_idx(nonret_each_month,
                                              cmonths, empkey_arr)

    # IDX
    skel['idx'] = long_index.astype(int)

    # EMPKEY
    skel['empkey'] = long_emp.astype(int)

    # grab retdates from df column (short_form)
    # used for mth_pcnt and age calc (also mapping retdates)
    dobs = list(df['dob'])

    df_last = pd.read_pickle('dill/last_month.pkl')

    df.set_index('retdate', inplace=True)
    df['lmonth_pcnt'] = df_last.last_pay
    df.reset_index(inplace=True)
    df.set_index('empkey', inplace=True, verify_integrity=False, drop=False)

    lmonth_pcnt = df.lmonth_pcnt.values

    df_dict = {'mth_pcnt': lmonth_pcnt, 'final_month': cmonths}

    df_last_month = pd.DataFrame(df_dict)

    df_last_month['idx'] = df_last_month.index

    df_last_month.set_index(['idx', 'final_month'], inplace=True)

    skel = pd.merge(skel, df_last_month, right_index=True,
                    left_on=['idx', 'mnum'], how='outer')

    # MTH_PCNT
    skel['mth_pcnt'] = skel.mth_pcnt.fillna(1)

    # DATE, YEAR, PAY RAISE*

    # set up date_range - end of month dates

    df_dates = pd.DataFrame(pd.date_range(start_date,
                                          periods=len(nonret_each_month),
                                          freq='M'), columns=['date'])

    # this function produces a 2-column array.
    # First column is the year value of the date list passed as an input.
    # The second column is either 1.0 or
    # a calculated percentage pay raise after the last contract year.

    if sdict['compute_pay_measures']:
        df_dates = f.contract_year_and_raise(df_dates, sdict)

    # the merge below brings in 3 columns - date, year, and pay_raise
    # - from month_form to long_form

    # DATE, YEAR, PAY RAISE
    skel = pd.merge(skel, df_dates, right_index=True, left_on=['mnum'])

    # AGE, SCALE*
    # calculate and assign starting age and
    # starting longevity.
    # Assign to columns in df and then data align merge into skeleton df.
    # These columns are used later for age and scale calculations.
    # Merged here so that they could be done together
    # after setting indexes to match.

    s_age = f.starting_age(dobs, start_date)
    df['s_age'] = s_age

    # data alignment magic...set index to empkey
    skel.set_index('empkey', inplace=True, verify_integrity=False, drop=False)

    # AGE, RETDATE, EG, DOH, LDATE, LNAME,
    # FUR, RET_MONTH to long_form skeleton
    skel['s_age'] = df.s_age
    skel['fur'] = df.fur

    if sdict['add_eg_col']:
        skel['eg'] = df.eg
    if sdict['add_retdate_col']:
        skel['retdate'] = df.retdate
    if sdict['add_doh_col']:
        skel['doh'] = df.doh
    if sdict['add_ldate_col']:
        skel['ldate'] = df.ldate
    if sdict['add_lname_col']:
        skel['lname'] = df.lname
    if sdict['add_line_col']:
        skel['line'] = df.line
    if sdict['add_sg_col']:
        skel['sg'] = df.sg

    # RET_MARK
    # add last month number to df
    df['ret_month'] = cmonths
    # data align to long-form skel
    skel['ret_mark'] = df.ret_month
    mnums = skel.mnum.values
    lmonth_arr = np.zeros(mnums.size).astype(int)
    ret_month = skel.ret_mark.values
    # mark array where retirement month is equal to month number
    np.put(lmonth_arr, np.where(ret_month == mnums)[0], 1)
    skel['ret_mark'] = lmonth_arr

    # SCALE*

    if sdict['compute_pay_measures']:

        df['s_lyears'] = f.longevity_at_startdate(list(df['ldate']),
                                                  start_date)
        skel['s_lyears'] = df.s_lyears

        month_inc = (1 / 12)

        # scale is payrate longevity level
        # compute scale for each employee for each month
        # begin with s_lyears (starting longevity years)
        # add a monthly increment based on the month number (mnum)
        # convert to an integer which rounds toward zero
        # clip to min of 1 and max of top_of_scale (max pay longevity scale)
        skel['scale'] = np.clip(((skel['mnum'] * month_inc) +
                                skel['s_lyears']).astype(int),
                                1,
                                sdict['top_of_scale'])
        skel.pop('s_lyears')

        # this column is only used for calculating furloughed employee pay
        # longevity in compute_measures routine.
        # ...could be an option if recalls are not part of model
        df['s_lmonths'] = f.longevity_at_startdate(list(df['ldate']),
                                                   sdict['starting_date'],
                                                   return_as_months=True)
        skel['s_lmonths'] = df.s_lmonths

    # AGE

    # calculate monthly age using starting age and month number

    age_list = skel.s_age.values

    corr_ages = f.age_correction(long_form_skeleton,
                                 age_list,
                                 sdict['ret_age'])

    if sdict['ret_age_increase']:
        skel['age'] = f.clip_ret_ages(sdict['ret_incr_dict'],
                                      sdict['init_ret_age'],
                                      skel.date.values, corr_ages)
    else:
        skel['age'] = corr_ages

    skel.pop('s_age')

    # empkey index (keep empkey column)
    # this is for easy data alignment with different list order keys

    # save results to pickle
    if sdict['save_to_pickle']:
        skel.to_pickle(skel_path_string)
Пример #8
0
def main():

    script, *conditions = argv

    input_skel = 'skeleton'

    pre, suf = 'dill/', '.pkl'

    skeleton_path_string = (pre + input_skel + suf)

    try:
        ds = pd.read_pickle(skeleton_path_string)
    except OSError:
        print('\nSkeleton file not found. ' +
              'Run build_program_files script?\n\n' +
              'Standalone build failed.\n\n' + '  >>> exiting routine.\n')
        exit()

    if os.path.isdir('dill/'):
        try:
            os.remove('dill/standalone.pkl')
        except OSError:
            pass

    sdict = pd.read_pickle('dill/dict_settings.pkl')
    tdict = pd.read_pickle('dill/dict_job_tables.pkl')

    num_of_job_levels = sdict['num_of_job_levels']
    egs = np.unique(ds.eg)
    start_month = 0

    # make prex True or False
    # (for input to assign_standalone_job_changes function)
    prex = 'prex' in conditions

    table = tdict['s_table']
    jcnts_arr = tdict['jcnts_arr']
    j_changes = tdict['j_changes']

    job_change_months = f.get_job_change_months(j_changes)
    job_reduction_months = f.get_job_reduction_months(j_changes)

    # sort the skeleton by employee group, month, and index
    # (preserves each group's list order)
    ds.sort_values(['eg', 'mnum', 'idx'])

    ds_dict = {}
    short_ds_dict = {}

    for grp in egs:
        ds_dict[grp] = ds[ds.eg == grp].copy()

    for grp in egs:
        short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy()

    ds = pd.DataFrame()

    for eg in egs:

        df_long = ds_dict[eg]
        df_short = short_ds_dict[eg]
        jcnts = jcnts_arr[0][eg - 1]
        short_len = len(df_short)

        # ORIG_JOB*
        cmonths_this_ds = \
            f.career_months(df_short, sdict['starting_date'])
        this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds)
        high_limits = this_ds_nonret_each_month.cumsum()
        low_limits = f.make_lower_slice_limits(high_limits)
        all_months = np.sum(this_ds_nonret_each_month)

        this_eg_table = f.add_zero_col(table[0][eg - 1])
        this_eg_month_counts = table[1][eg - 1]

        df_align_cols = ['fur']
        if 'sg' in df_long:
            df_align_cols.append('sg')

        df_align = df_long[df_align_cols]

        # pre-existing employee group special job assignment is included within
        # the job assignment function below...
        results = f.assign_standalone_job_changes(eg,
                                                  df_align,
                                                  low_limits,
                                                  high_limits,
                                                  all_months,
                                                  this_eg_table,
                                                  this_eg_month_counts,
                                                  this_ds_nonret_each_month,
                                                  job_change_months,
                                                  job_reduction_months,
                                                  start_month,
                                                  sdict,
                                                  tdict,
                                                  apply_sg_cond=prex)

        jnums = results[0]
        count_col = results[1]
        held = results[2]
        fur = results[3]
        orig_jobs = results[4]
        # HELD JOB
        # job from previous month
        df_long['held'] = held
        # JOB_COUNT
        df_long['job_count'] = count_col

        # ORIG_JOB
        df_short['orig_job'] = orig_jobs
        df_long['orig_job'] = df_short['orig_job']

        # ASSIGN JOBS - (stovepipe method only since only
        # assigning within each employee group separately)

        # JNUM

        df_long['jnum'] = jnums

        # SNUM, SPCNT, LNUM, LSPCNT

        monthly_job_counts = table[1][eg - 1]
        lspcnt_calc = sdict['lspcnt_calc_on_remaining_population']

        df_long['snum'], df_long['spcnt'], \
            df_long['lnum'], df_long['lspcnt'] = \
            f.create_snum_and_spcnt_arrays(jnums, num_of_job_levels,
                                           this_ds_nonret_each_month,
                                           monthly_job_counts,
                                           lspcnt_calc)

        # RANK in JOB

        df_long['rank_in_job'] = \
            df_long.groupby(['mnum', 'jnum']).cumcount() + 1

        # JOBP
        # make last percentage position in each job category .99999 vs 1.0
        # so that jobp calculations are correct
        jpcnt = (df_long.rank_in_job / df_long.job_count).values
        np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999)

        df_long['jobp'] = df_long['jnum'] + jpcnt

        # PAY - merge with pay table - provides monthly pay
        if sdict['compute_pay_measures']:

            if sdict['discount_longev_for_fur']:
                # skel provides non-discounted scale data
                # flip ones and zeros...
                df_long['non_fur'] = 1 - fur
                df_long['fur'] = fur

                non_fur = \
                    (df_long.groupby([pd.Grouper('empkey')])
                     ['non_fur'].cumsum().values)
                df_long.pop('non_fur')
                starting_mlong = df_long.s_lmonths.values
                cum_active_months = non_fur + starting_mlong
                df_long['mlong'] = cum_active_months
                df_long['ylong'] = df_long['mlong'] / 12
                df_long['scale'] = \
                    np.clip((cum_active_months / 12) + 1, 1,
                            sdict['top_of_scale']).astype(int)

            # SCALE
            df_pt_index = pd.DataFrame(index=(df_long['scale'] * 100) +
                                       df_long['jnum'] +
                                       (df_long['year'] * 100000))

            if sdict['enhanced_jobs']:
                df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl')
            else:
                df_pt = pd.read_pickle('dill/pay_table_basic.pkl')

            df_pt_index['monthly'] = df_pt['monthly']

            df_long['monthly'] = df_pt_index.monthly.values

            # MPAY
            # adjust monthly pay for any raise and last month pay percent if
            # applicable

            df_long['mpay'] = ((df_long['pay_raise'] * df_long['mth_pcnt'] *
                                df_long['monthly'])) / 1000

            df_long.pop('monthly')

            # CPAY

            df_long['cpay'] = df_long.groupby('idx')['mpay'].cumsum()

        ds = pd.concat([ds, df_long], ignore_index=True)

    ds.sort_values(by=['mnum', 'idx'], inplace=True)
    ds.set_index('empkey', drop=False, verify_integrity=False, inplace=True)

    # CAT_ORDER
    # global job ranking

    if sdict['compute_job_category_order']:
        table = tdict['table']
        ds['cat_order'] = f.make_cat_order(ds, table[0])

    # save to file
    if sdict['save_to_pickle']:
        ds.to_pickle('dill/standalone.pkl')
Пример #9
0
for grp in egs:
    short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy()

ds = pd.DataFrame()

for i in egs - 1:

    df_long = ds_dict[i + 1]
    df_short = short_ds_dict[i + 1]
    jcnts = jcnts_arr[0][i]
    short_len = len(df_short)

    # ORIG_JOB*
    cmonths_this_ds = f.career_months_df_in(df_short)
    this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds)
    uppers = this_ds_nonret_each_month.cumsum()
    lowers = f.make_lower_slice_limits(uppers)
    all_months = np.sum(this_ds_nonret_each_month)

    this_table = table[0][i]
    this_month_counts = table[1][i]

    df_align_cols = ['fur']
    if 'sg' in df_long:
        df_align_cols.append('sg')

    df_align = df_long[df_align_cols]
    fur_codes = np.array(df_align.fur)

    # pre-existing employee group special job assignment is included within
Пример #10
0
def main():

    # read prepared list dataframe - proper column headers, column formats...
    # this is master.pkl, order-independent, concatenated list data
    pre, suf = 'dill/', '.pkl'
    master_list = 'master'
    master_path = (pre + master_list + suf)

    try:
        df_mlist = pd.read_pickle(master_path)
    except OSError:
        print('\nMaster list not found.  Run build_program_files script?\n\n' +
              'Skeleton build failed.\n\n' + '  >>> exiting routine.\n')
        import sys
        sys.exit()

    output_name = 'skeleton'
    skel_path_string = (pre + output_name + suf)

    sdict = pd.read_pickle('dill/dict_settings.pkl')

    # only include pilots that are not retired prior to the starting_month
    start_date = sdict['starting_date']

    df_mlist = df_mlist[df_mlist.retdate >= start_date -
                        pd.DateOffset(months=1)]

    # include furloughees by default
    df = df_mlist[(df_mlist.line == 1) | (df_mlist.fur == 1)].copy()

    df_mlist = []

    # MNUM*
    # calculate the number of career months for each employee (short_form)
    # cmonths is used for mnum, idx, and mth_pcnt calculations

    cmonths = f.career_months(df, start_date)
    # convert the python cmonths list to a numpy array and
    # use that array as input for the count_per_month function.
    # The count_per_month function output array is input for
    # other functions (month_form)

    nonret_each_month = f.count_per_month(cmonths)

    # first long form data generation.
    # month numbers, same month number repeated for each
    # month length (long_form)

    long_form_skeleton = f.gen_month_skeleton(nonret_each_month)

    # this is making a dataframe out of the
    # long_form_skeleton (months) created above.
    # this is the basis for the long_form dataframe...

    # MNUM
    # (month number)

    skel = pd.DataFrame(long_form_skeleton.astype(int), columns=['mnum'])

    # IDX*
    # grab emp index for each remaining
    # employee for each month - used for merging dfs later

    empkey_arr = df.empkey.values

    long_index, long_emp = f.gen_skel_emp_idx(nonret_each_month, cmonths,
                                              empkey_arr)

    # IDX
    skel['idx'] = long_index.astype(int)

    # EMPKEY
    skel['empkey'] = long_emp.astype(int)

    # grab retdates from df column (short_form)
    # used for mth_pcnt and age calc (also mapping retdates)
    dobs = list(df['dob'])

    df_last = pd.read_pickle('dill/last_month.pkl')

    df.set_index('retdate', inplace=True)
    df['lmonth_pcnt'] = df_last.last_pay
    df.reset_index(inplace=True)
    df.set_index('empkey', inplace=True, verify_integrity=False, drop=False)

    lmonth_pcnt = df.lmonth_pcnt.values

    df_dict = {'mth_pcnt': lmonth_pcnt, 'final_month': cmonths}

    df_last_month = pd.DataFrame(df_dict)

    df_last_month['idx'] = df_last_month.index

    df_last_month.set_index(['idx', 'final_month'], inplace=True)

    skel = pd.merge(skel,
                    df_last_month,
                    right_index=True,
                    left_on=['idx', 'mnum'],
                    how='outer')

    # MTH_PCNT
    skel['mth_pcnt'] = skel.mth_pcnt.fillna(1)

    # DATE, YEAR, PAY RAISE*

    # set up date_range - end of month dates

    df_dates = pd.DataFrame(pd.date_range(start_date,
                                          periods=len(nonret_each_month),
                                          freq='M'),
                            columns=['date'])

    # this function produces a 2-column array.
    # First column is the year value of the date list passed as an input.
    # The second column is either 1.0 or
    # a calculated percentage pay raise after the last contract year.

    if sdict['compute_pay_measures']:
        df_dates = f.contract_year_and_raise(df_dates, sdict)

    # the merge below brings in 3 columns - date, year, and pay_raise
    # - from month_form to long_form

    # DATE, YEAR, PAY RAISE
    skel = pd.merge(skel, df_dates, right_index=True, left_on=['mnum'])

    # AGE, SCALE*
    # calculate and assign starting age and
    # starting longevity.
    # Assign to columns in df and then data align merge into skeleton df.
    # These columns are used later for age and scale calculations.
    # Merged here so that they could be done together
    # after setting indexes to match.

    s_age = f.starting_age(dobs, start_date)
    df['s_age'] = s_age

    # data alignment magic...set index to empkey
    skel.set_index('empkey', inplace=True, verify_integrity=False, drop=False)

    # AGE, RETDATE, EG, DOH, LDATE, LNAME,
    # FUR, RET_MONTH to long_form skeleton
    skel['s_age'] = df.s_age
    skel['fur'] = df.fur

    if sdict['add_eg_col']:
        skel['eg'] = df.eg
    if sdict['add_retdate_col']:
        skel['retdate'] = df.retdate
    if sdict['add_doh_col']:
        skel['doh'] = df.doh
    if sdict['add_ldate_col']:
        skel['ldate'] = df.ldate
    if sdict['add_lname_col']:
        skel['lname'] = df.lname
    if sdict['add_line_col']:
        skel['line'] = df.line
    if sdict['add_sg_col']:
        skel['sg'] = df.sg

    # RET_MARK
    # add last month number to df
    df['ret_month'] = cmonths
    # data align to long-form skel
    skel['ret_mark'] = df.ret_month
    mnums = skel.mnum.values
    lmonth_arr = np.zeros(mnums.size).astype(int)
    ret_month = skel.ret_mark.values
    # mark array where retirement month is equal to month number
    np.put(lmonth_arr, np.where(ret_month == mnums)[0], 1)
    skel['ret_mark'] = lmonth_arr

    # SCALE*

    if sdict['compute_pay_measures']:

        df['s_lyears'] = f.longevity_at_startdate(list(df['ldate']),
                                                  start_date)
        skel['s_lyears'] = df.s_lyears

        month_inc = (1 / 12)

        # scale is payrate longevity level
        # compute scale for each employee for each month
        # begin with s_lyears (starting longevity years)
        # add a monthly increment based on the month number (mnum)
        # convert to an integer which rounds toward zero
        # clip to min of 1 and max of top_of_scale (max pay longevity scale)
        skel['scale'] = np.clip(
            ((skel['mnum'] * month_inc) + skel['s_lyears']).astype(int), 1,
            sdict['top_of_scale'])
        skel.pop('s_lyears')

        # this column is only used for calculating furloughed employee pay
        # longevity in compute_measures routine.
        # ...could be an option if recalls are not part of model
        df['s_lmonths'] = f.longevity_at_startdate(list(df['ldate']),
                                                   sdict['starting_date'],
                                                   return_as_months=True)
        skel['s_lmonths'] = df.s_lmonths

    # AGE

    # calculate monthly age using starting age and month number

    age_list = skel.s_age.values

    corr_ages = f.age_correction(long_form_skeleton, age_list,
                                 sdict['ret_age'])

    if sdict['ret_age_increase']:
        skel['age'] = f.clip_ret_ages(sdict['ret_incr_dict'],
                                      sdict['init_ret_age'], skel.date.values,
                                      corr_ages)
    else:
        skel['age'] = corr_ages

    skel.pop('s_age')

    # empkey index (keep empkey column)
    # this is for easy data alignment with different list order keys

    # save results to pickle
    if sdict['save_to_pickle']:
        skel.to_pickle(skel_path_string)