Exemplo n.º 1
0
def prerun_partition(debug=False):
    '''
    Prerun function for step 1, partition.

    This function cannot take in any arguments due to limitations
    of the RunPython VBA code in xlwings.

    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    if debug:
        print('=' * 30)
        print('start of prerun_partition')
        print('=' * 30)

    # Keys for input parameter dict
    keys = [
        'freq', 'sdate', 'edate', 'method', 'pcutoff', 'method_growth',
        'retropolate', 'sheet_partitions', 'sheet_loadings'
    ]

    # --------------------------
    # Read in parameters
    # --------------------------
    dict_input_partition = read_parameters_partition()

    # --------------------------
    # Check parameter values
    # --------------------------
    check_parameters_partition(dict_input_partition, keys)

    # --------------------------
    # Read in global parameters
    # --------------------------
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()

    # Add each key, val from dict_global_params to dict_input_partition
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_partition:
            message = 'dict_input_partition should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_partition[key] = val

    # --------------------------
    # Create df for data
    # --------------------------
    df_partition = read_data_partition()

    # --------------------------
    # Create a dict for groups
    # --------------------------

    if dict_input_partition['method'] == "PLS":
        dict_groups, dict_PLS = read_partition_groupsPLS()
        dict_input_partition['PLS_target'] = dict_PLS
    else:
        dict_groups = read_partition_groups()
        dict_input_partition['PLS_target'] = None

    # --------------------------
    # Check df for partition
    # --------------------------
    check_data_partition(df_partition, dict_input_partition)

    # --------------------------
    # Set start and end dates and
    # fill missing values for data df
    # --------------------------
    df_partition = format_data_partition(df_partition,
                                         dict_input_partition['sdate'],
                                         dict_input_partition['edate'])

    #---------------------------
    # Check PLS target coverage
    #---------------------------

    if dict_input_partition['method'] == "PLS":
        PLSvar = set()
        for g in dict_PLS.values():
            for e in g:
                PLSvar.add(e)
        PLSvar = list(PLSvar)
        if df_partition.loc[:, PLSvar].isnull().sum().sum() > 0:
            print(df_partition.loc[:, PLSvar].head())
            message = 'PLS target should cover all dates'
            show_message(message, halt=True)

    # --------------------------
    # Check partition groups
    # --------------------------
    check_partition_groups(dict_groups, df_partition)

    # --------------------------
    # Return a dict for input parameters,
    # a dict for groups,
    # and a df for the data
    # --------------------------
    return dict_input_partition, dict_groups, df_partition
Exemplo n.º 2
0
def read_parameters_tsfit():
    '''
    Read in parameters for tsfit.

    The cell positions for inputs are hardcoded in.
    Parameter value and range checking is done in the check_parameters_tsfit function.
    '''

    # Create dict for parameters
    dict_parameters_tsfit = dict()

    # ---------------------------#
    # Read in necessary values
    # ---------------------------#
    cellpos = 'F31'
    # Get a list of all values starting at cellpos going down
    dict_parameters_tsfit['quantlist'] = wb.sheets['Input_parameters'].range(
        cellpos).expand('down').value

    # Read in info on regressors.
    dict_parameters_tsfit['regressors'] = dict()
    # Start with the first cell that contains the regressors.
    startrow = 31
    cellpos = 'A' + str(startrow)
    # Read down and get a list of all regressors
    regressors = wb.sheets['Input_parameters'].range(cellpos).expand(
        'down').value
    if not isinstance(regressors, (list, tuple)):
        regressors = [regressors]
    for iregressor, regressor in enumerate(regressors):

        # Get the cells for transformation and optional parameter
        # from columns B and C for this row
        colnum = startrow + iregressor
        transform = wb.sheets['Input_parameters'].range('B' +
                                                        str(colnum)).value
        option = wb.sheets['Input_parameters'].range('D' + str(colnum)).value
        dict_parameters_tsfit['regressors'][regressor + '_trans_' +
                                            str(iregressor) + '_' +
                                            transform] = dict()
        # Set as values of dict
        dict_parameters_tsfit['regressors'][regressor + '_trans_' +
                                            str(iregressor) + '_' +
                                            transform]['transform'] = transform
        dict_parameters_tsfit['regressors'][regressor + '_trans_' +
                                            str(iregressor) + '_' +
                                            transform]['option'] = option

    # Read in latest_date
    cellpos = 'B61'
    dict_parameters_tsfit['latest_date'] = wb.sheets['Input_parameters'].range(
        cellpos).value

    # Read in t-skew fit parameters
    dict_parameters_tsfit['fit_params'] = dict()
    cellpos = 'B59'
    dict_parameters_tsfit['fit_params']['fittype'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    dict_parameters_tsfit['fit_params']['mode'] = dict()
    cellpos = 'B64'
    dict_parameters_tsfit['fit_params']['mode']['constraint'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    cellpos = 'D64'
    dict_parameters_tsfit['fit_params']['mode']['value'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    dict_parameters_tsfit['fit_params']['qsmooth'] = dict()
    cellpos = 'B67'
    dict_parameters_tsfit['fit_params']['qsmooth']['option'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    cellpos = 'D67'
    dict_parameters_tsfit['fit_params']['qsmooth']['period'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    #Read in plot parameters
    obj1 = wb.sheets['Input_parameters'].api
    for ind, c in enumerate(obj1.CheckBoxes()):
        e = c.value
        print(e)
        if ind == 0:
            dict_parameters_tsfit['fit_params']['plot_mode'] = (e > 0)
        elif ind == 1:
            dict_parameters_tsfit['fit_params']['plot_median'] = (e > 0)
        elif ind == 2:
            dict_parameters_tsfit['fit_params']['plot_mean'] = (e > 0)

    # Read in output sheet
    cellpos = 'B69'
    dict_parameters_tsfit['sheet_tsfit'] = wb.sheets['Input_parameters'].range(
        cellpos).value

    # Advanced t-skew fit parameters
    # Process each parameter in order
    pos = 74
    for param in ['dof', 'var_low', 'var_high', 'skew_low', 'skew_high']:
        dict_parameters_tsfit['fit_params'][param] = dict()
        cellpos = 'B' + str(pos)
        dict_parameters_tsfit['fit_params'][param]['constraint'] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        cellpos = 'D' + str(pos)
        dict_parameters_tsfit['fit_params'][param]['value'] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        pos += 1

    # The sheetname for the input is read in from what is in cell B52.
    # This is read in for do_quantfit() but since there is no way to connect
    # the output, we have to assume that the user has not changed this cell
    # since running the partitions.

    startrow = 51
    for isheetname, sheetname in enumerate(
        ['sheet_quantreg', 'sheet_cond_quant']):
        colnum = startrow + isheetname
        cellpos = 'B' + str(colnum)
        dict_parameters_tsfit[sheetname] = wb.sheets['Input_parameters'].range(
            cellpos).value
        # checking of values will be done in check_parameters_quantfit

    # Read in partitions sheet
    cellpos = 'B24'
    dict_parameters_tsfit['sheet_partition'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    dict_parameters_tsfit['partition_groups'] = read_partition_groups()

    return dict_parameters_tsfit
Exemplo n.º 3
0
def read_parameters_scenario():
    '''
    Read in parameters for quantfit.

    The cell positions for inputs are hardcoded in.
    Parameter value and range checking is done in the check_parameters_quantfit function.
    '''

    # Create dict for parameters
    dict_parameters_scenario = dict()

    # ---------------------------#
    # Read in necessary values
    # ---------------------------#

    # Read in quantlist
    cellpos = 'F31'
    # Get a list of all values starting at cellpos going down
    dict_parameters_scenario['quantlist'] = wb.sheets[
        'Input_parameters'].range(cellpos).expand('down').value

    # Read in info on regressors.
    dict_parameters_scenario['regressors'] = dict()
    # Start with the first cell that contains the regressors.
    startrow = 31
    cellpos = 'A' + str(startrow)
    # Read down and get a list of all regressors
    regressors = wb.sheets['Input_parameters'].range(cellpos).expand(
        'down').value
    if not isinstance(regressors, (list, tuple)):
        regressors = [regressors]
    for iregressor, regressor in enumerate(regressors):

        # Get the cells for transformation and optional parameter
        # from columns B and C for this row
        colnum = startrow + iregressor
        transform = wb.sheets['Input_parameters'].range('B' +
                                                        str(colnum)).value
        option = wb.sheets['Input_parameters'].range('D' + str(colnum)).value
        dict_parameters_scenario['regressors'][regressor + '_trans_' +
                                               str(iregressor) + '_' +
                                               transform] = dict()
        # Set as values of dict
        dict_parameters_scenario['regressors'][
            regressor + '_trans_' + str(iregressor) + '_' +
            transform]['transform'] = transform
        dict_parameters_scenario['regressors'][regressor + '_trans_' +
                                               str(iregressor) + '_' +
                                               transform]['option'] = option

    # Read in latest_date
    cellpos = 'B61'
    dict_parameters_scenario['latest_date'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    # Read in t-skew fit parameters
    dict_parameters_scenario['fit_params'] = dict()
    cellpos = 'B59'
    dict_parameters_scenario['fit_params']['fittype'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    dict_parameters_scenario['fit_params']['mode'] = dict()
    cellpos = 'B64'
    dict_parameters_scenario['fit_params']['mode']['constraint'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    cellpos = 'D64'
    dict_parameters_scenario['fit_params']['mode']['value'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    dict_parameters_scenario['fit_params']['qsmooth'] = dict()
    cellpos = 'B67'
    dict_parameters_scenario['fit_params']['qsmooth']['option'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    cellpos = 'D67'
    dict_parameters_scenario['fit_params']['qsmooth']['period'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    # Advanced t-skew fit parameters
    # Process each parameter in order
    pos = 74
    for param in ['dof', 'var_low', 'var_high', 'skew_low', 'skew_high']:
        dict_parameters_scenario['fit_params'][param] = dict()
        cellpos = 'B' + str(pos)
        dict_parameters_scenario['fit_params'][param][
            'constraint'] = wb.sheets['Input_parameters'].range(cellpos).value
        cellpos = 'D' + str(pos)
        dict_parameters_scenario['fit_params'][param]['value'] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        pos += 1

    # Read in shocked t-skew fit parameters
    dict_parameters_scenario['fit_params_shocked'] = copy.deepcopy(
        dict_parameters_scenario['fit_params'])
    cellpos = 'B110'
    dict_parameters_scenario['fit_params_shocked']['mode'][
        'constraint'] = wb.sheets['Input_parameters'].range(cellpos).value
    cellpos = 'D110'
    dict_parameters_scenario['fit_params_shocked']['mode'][
        'value'] = wb.sheets['Input_parameters'].range(cellpos).value

    # Read in scenario parameters

    dict_parameters_scenario['shockvars'] = dict()
    startrow = 99
    cellpos = 'A' + str(startrow)
    shockvars = wb.sheets['Input_parameters'].range(cellpos).expand(
        'down').value
    if not isinstance(shockvars, (list, tuple)):
        shockvars = [shockvars]

    for ind, shockvar in enumerate(shockvars):
        col = startrow + ind
        shocktype = wb.sheets['Input_parameters'].range('B' + str(col)).value
        if shocktype == 'By +/- STD':
            shockvalue = wb.sheets['Input_parameters'].range('F' +
                                                             str(col)).value
        else:
            shockvalue = wb.sheets['Input_parameters'].range('D' +
                                                             str(col)).value

        dict_parameters_scenario['shockvars'][shockvar] = dict()
        dict_parameters_scenario['shockvars'][shockvar][
            'shocktype'] = shocktype
        dict_parameters_scenario['shockvars'][shockvar][
            'shockvalue'] = shockvalue

    # Read in output sheet
    cellpos = 'B112'
    dict_parameters_scenario['sheet_scenario'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    startrow = 51
    for isheetname, sheetname in enumerate(
        ['sheet_quantreg', 'sheet_cond_quant']):
        colnum = startrow + isheetname
        cellpos = 'B' + str(colnum)
        dict_parameters_scenario[sheetname] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        # checking of values will be done in check_parameters_quantfit

    # Read in partitions sheet
    cellpos = 'B24'
    dict_parameters_scenario['sheet_partition'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    dict_parameters_scenario['partition_groups'] = read_partition_groups()
    return dict_parameters_scenario
Exemplo n.º 4
0
def read_parameters_segment():
    '''
    Read in parameters for quantfit.

    The cell positions for inputs are hardcoded in.
    Parameter value and range checking is done in the check_parameters_quantfit function.
    '''

    # Create dict for parameters
    dict_parameters_segment = dict()

    # ---------------------------#
    # Read in necessary values
    # ---------------------------#

    # Read in partition parament
    pos = 17

    # Process each parameter in order
    for param in [
            'freq', 'sdate', 'edate', 'method', 'pcutoff', 'method_growth',
            'retropolate'
    ]:
        cellpos = 'B' + str(pos)
        dict_parameters_segment[param] = wb.sheets['Input_parameters'].range(
            cellpos).value
        pos += 1

    # Read in quantlist
    cellpos = 'F31'
    # Get a list of all values starting at cellpos going down
    dict_parameters_segment['quantlist'] = wb.sheets['Input_parameters'].range(
        cellpos).expand('down').value

    # Read in info on regressors.
    dict_parameters_segment['regressors'] = dict()
    # Start with the first cell that contains the regressors.
    startrow = 31
    cellpos = 'A' + str(startrow)
    # Read down and get a list of all regressors
    regressors = wb.sheets['Input_parameters'].range(cellpos).expand(
        'down').value
    if not isinstance(regressors, (list, tuple)):
        regressors = [regressors]
    for iregressor, regressor in enumerate(regressors):

        # Get the cells for transformation and optional parameter
        # from columns B and C for this row
        colnum = startrow + iregressor
        transform = wb.sheets['Input_parameters'].range('B' +
                                                        str(colnum)).value
        option = wb.sheets['Input_parameters'].range('D' + str(colnum)).value
        dict_parameters_segment['regressors'][regressor + '_trans_' +
                                              str(iregressor) + '_' +
                                              transform] = dict()
        # Set as values of dict
        dict_parameters_segment['regressors'][
            regressor + '_trans_' + str(iregressor) + '_' +
            transform]['transform'] = transform
        dict_parameters_segment['regressors'][regressor + '_trans_' +
                                              str(iregressor) + '_' +
                                              transform]['option'] = option

    # Read in t-skew fit parameters
    dict_parameters_segment['fit_params'] = dict()
    cellpos = 'B59'
    dict_parameters_segment['fit_params']['fittype'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    dict_parameters_segment['fit_params']['qsmooth'] = dict()
    cellpos = 'B67'
    dict_parameters_segment['fit_params']['qsmooth']['option'] = wb.sheets[
        'Input_parameters'].range(cellpos).value
    cellpos = 'D67'
    dict_parameters_segment['fit_params']['qsmooth']['period'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    # Advanced t-skew fit parameters
    # Process each parameter in order
    pos = 74
    for param in ['dof', 'var_low', 'var_high', 'skew_low', 'skew_high']:
        dict_parameters_segment['fit_params'][param] = dict()
        cellpos = 'B' + str(pos)
        dict_parameters_segment['fit_params'][param]['constraint'] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        cellpos = 'D' + str(pos)
        dict_parameters_segment['fit_params'][param]['value'] = wb.sheets[
            'Input_parameters'].range(cellpos).value
        pos += 1

    # Read in segment parameters

    # Start with the first cell that contains the horizonlist.
    startrow = 120
    cellpos = 'A' + str(startrow)
    # Read down and get a list of all horizons
    dict_parameters_segment['horizonlist'] = wb.sheets[
        'Input_parameters'].range(cellpos).expand('down').value

    # Start with the first cell that contains the fitdatelist.
    cellpos = 'B' + str(startrow)
    # Read down and get a list of all horizons
    dict_parameters_segment['fitdatelist'] = wb.sheets[
        'Input_parameters'].range(cellpos).expand('down').value

    n_hzs = len(dict_parameters_segment['fitdatelist'])
    dict_parameters_segment['fitconstrainlist'] = []
    dict_parameters_segment['fitconstrainvalues'] = []
    # Start with the first cell that contains the mode.
    for i in range(n_hzs):
        cellpos = 'D' + str(startrow + i)
        # Read down and get a list of all horizons
        dict_parameters_segment['fitconstrainlist'].append(
            wb.sheets['Input_parameters'].range(cellpos).value)

        # Start with the first cell of values of the mode constrains.
        cellpos = 'F' + str(startrow + i)
        # Read down and get a list of all horizons
        dict_parameters_segment['fitconstrainvalues'].append(
            wb.sheets['Input_parameters'].range(cellpos).value)

    # Read in output sheet
    cellpos = 'B130'
    dict_parameters_segment['sheet_segment'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    cellpos = 'B132'
    dict_parameters_segment['sheet_term'] = wb.sheets[
        'Input_parameters'].range(cellpos).value

    dict_parameters_segment['partition_groups'] = read_partition_groups()

    if dict_parameters_segment['method'] == "PLS":
        dict_groups, dict_PLS = read_partition_groupsPLS()
        dict_parameters_segment['PLS_target'] = dict_PLS
        dict_parameters_segment['partition_groups'] = dict_groups
    else:
        dict_groups = read_partition_groups()
        dict_parameters_segment['partition_groups'] = dict_groups
        dict_parameters_segment['PLS_target'] = None
    return dict_parameters_segment
Exemplo n.º 5
0
def run_quantfit(dict_input_quantfit, df_quantfit, debug=False):
    '''
    Main run function for step 2, quantfit.

    Takes in as arguments a dict for input parameters
    and a df for data. Outputs a dict for output parameters.

    Does quantile fits and returns a dict of output parameters.
    ** This function should be independent of any Excel input/output
    and be executable as a regular Python function independent of Excel. **
    '''

    warnings.filterwarnings("ignore")

    if debug:
        print('=' * 30)
        print('start of run_quantfit')
        print('=' * 30)

    # ------------------------
    # Create DataFrame for log
    # ------------------------
    log_frame = pd.DataFrame(columns=['Time', 'Action'])

    # ------------------------
    # Create output dict
    # ------------------------
    dict_output_quantfit = dict()

    # ------------------------
    # Copy the output sheet names
    # from dict_input_quantfit
    # ------------------------
    for key in dict_input_quantfit:
        if key.find('sheet_') != -1:
            dict_output_quantfit[key] = dict_input_quantfit[key]

    # ------------------------
    # Get parameters from
    # dict_input_quantfit
    # ------------------------
    horizon = dict_input_quantfit['horizon']
    depvar = dict_input_quantfit['target'] + '_cpd_' + str(horizon)

    # Get the list of regressors from the sheet Partition_groups
    dict_groups = read_partition_groups()
    regressors = list(dict_groups.keys())

    # TODO: Don't do this here, drop the column so it is not redundant
    df_quantfit = df_quantfit.set_index(df_quantfit['date'], drop=False)
    #df_quantfit = df_quantfit[:-horizon]
    # ------------------------
    # Run the quantfit
    # ------------------------
    qcoeff_all, dcond_quantiles_all, loco_all, exitcode = condquant(
        df_quantfit, depvar, regressors, horizon,
        dict_input_quantfit['quantlist'])

    if exitcode < 1:
        action = 'Failed to do quantile regression, exit code: ' + str(
            exitcode)
    else:
        action = 'Quantile regression finished succesfully.'
    tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
    log = pd.Series({'Time': tn, 'Action': action})
    log_frame.append(log, ignore_index=True)

    # Add return values
    figs = {}
    figs = coeff_plot(qcoeff_all, dict_groups,
                      dict_input_quantfit['quantlist'])
    dict_output_quantfit['qcoef'] = qcoeff_all
    dict_output_quantfit['cond_quant'] = dcond_quantiles_all
    dict_output_quantfit['localprj'] = loco_all
    dict_output_quantfit['figs'] = figs

    return dict_output_quantfit