Python sublists примеры использования

Язык программирования: Python

Пространство имен/Пакет: statsmodels.tsa.vector_ar.tests.JMulTi_results.parse_jmulti_vecm_output

Метод/Функция: sublists

Примеров на hotexamples.com: 4

Python sublists - 4 примера найдено. Это лучшие примеры Python кода для statsmodels.tsa.vector_ar.tests.JMulTi_results.parse_jmulti_vecm_output.sublists, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: test_var_jmulti.py Проект: zqwght/statsmodels

def test_causality():  # test Granger- and instantaneous causality
    if debug_mode:
        if "causality" not in to_test:
            return
        else:
            print("\n\nCAUSALITY", end="")
    for ds in datasets:
        for dt in dt_s_list:
            if debug_mode:
                print("\n" + dt_s_tup_to_string(dt) + ": ", end="")

            err_msg_g_p = build_err_msg(ds, dt, "GRANGER CAUS. - p-VALUE")
            err_msg_g_t = build_err_msg(ds, dt, "GRANGER CAUS. - TEST STAT.")
            err_msg_i_p = build_err_msg(ds, dt, "INSTANT. CAUS. - p-VALUE")
            err_msg_i_t = build_err_msg(ds, dt, "INSTANT. CAUS. - TEST STAT.")
            v_ind = range(len(ds.variable_names))
            for causing_ind in sublists(v_ind, 1, len(v_ind)-1):
                causing_names = ["y" + str(i+1) for i in causing_ind]
                causing_key = tuple(ds.variable_names[i] for i in causing_ind)

                caused_ind = [i for i in v_ind if i not in causing_ind]
                caused_names = ["y" + str(i+1) for i in caused_ind]
                caused_key = tuple(ds.variable_names[i] for i in caused_ind)

                # test Granger-causality ######################################
                granger_sm_ind = results_sm[ds][
                    dt].test_causality(caused_ind, causing_ind)
                granger_sm_str = results_sm[ds][
                    dt].test_causality(caused_names, causing_names)

                # test test-statistic for Granger non-causality:
                g_t_obt = granger_sm_ind.test_statistic
                g_t_des = results_ref[ds][dt]["granger_caus"][
                    "test_stat"][(causing_key, caused_key)]
                yield assert_allclose, g_t_obt, g_t_des, rtol, atol, \
                    False, err_msg_g_t
                # check whether string sequences as args work in the same way:
                g_t_obt_str = granger_sm_str.test_statistic
                yield assert_allclose, g_t_obt_str, g_t_obt, 1e-07, 0, False, \
                    err_msg_g_t + " - sequences of integers and ".upper() + \
                    "strings as arguments don't yield the same result!".upper()
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1 or len(caused_ind) == 1:
                    ci = causing_ind[0] if len(causing_ind)==1 else causing_ind
                    ce = caused_ind[0] if len(caused_ind) == 1 else caused_ind
                    granger_sm_single_ind = results_sm[ds][
                        dt].test_causality(ce, ci)
                    g_t_obt_single = granger_sm_single_ind.test_statistic
                    yield assert_allclose, g_t_obt_single, g_t_obt, 1e-07, 0, \
                        False, \
                        err_msg_g_t + " - list of int and int as ".upper() + \
                        "argument don't yield the same result!".upper()

                # test p-value for Granger non-causality:
                g_p_obt = granger_sm_ind.pvalue
                g_p_des = results_ref[ds][dt]["granger_caus"]["p"][(
                    causing_key, caused_key)]
                yield assert_allclose, g_p_obt, g_p_des, rtol, atol, \
                    False, err_msg_g_p
                # check whether string sequences as args work in the same way:
                g_p_obt_str = granger_sm_str.pvalue
                yield assert_allclose, g_p_obt_str, g_p_obt, 1e-07, 0, False, \
                    err_msg_g_t + " - sequences of integers and ".upper() + \
                    "strings as arguments don't yield the same result!".upper()
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    g_p_obt_single = granger_sm_single_ind.pvalue
                    yield assert_allclose, g_p_obt_single, g_p_obt, 1e-07, 0, \
                        False, \
                        err_msg_g_t + " - list of int and int as ".upper() + \
                        "argument don't yield the same result!".upper()

                # test instantaneous causality ################################
                inst_sm_ind = results_sm[ds][dt].test_inst_causality(
                    causing_ind)
                inst_sm_str = results_sm[ds][dt].test_inst_causality(
                    causing_names)
                # test test-statistic for instantaneous non-causality
                t_obt = inst_sm_ind.test_statistic
                t_des = results_ref[ds][dt]["inst_caus"][
                    "test_stat"][(causing_key, caused_key)]
                yield assert_allclose, t_obt, t_des, rtol, atol, False, \
                    err_msg_i_t
                # check whether string sequences as args work in the same way:
                t_obt_str = inst_sm_str.test_statistic
                yield assert_allclose, t_obt_str, t_obt, 1e-07, 0, False, \
                    err_msg_i_t + " - sequences of integers and ".upper() + \
                    "strings as arguments don't yield the same result!".upper()
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    inst_sm_single_ind = results_sm[ds][
                        dt].test_inst_causality(causing_ind[0])
                    t_obt_single = inst_sm_single_ind.test_statistic
                    yield assert_allclose, t_obt_single, t_obt, 1e-07, 0, \
                        False, \
                        err_msg_i_t + " - list of int and int as ".upper() + \
                        "argument don't yield the same result!".upper()

                # test p-value for instantaneous non-causality
                p_obt = results_sm[ds][dt].test_inst_causality(
                    causing_ind).pvalue
                p_des = results_ref[ds][dt]["inst_caus"]["p"][(
                    causing_key, caused_key)]
                yield assert_allclose, p_obt, p_des, rtol, atol, False, \
                    err_msg_i_p
                # check whether string sequences as args work in the same way:
                p_obt_str = inst_sm_str.pvalue
                yield assert_allclose, p_obt_str, p_obt, 1e-07, 0, False, \
                    err_msg_i_p + " - sequences of integers and ".upper() + \
                    "strings as arguments don't yield the same result!".upper()
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    inst_sm_single_ind = results_sm[ds][
                        dt].test_inst_causality(causing_ind[0])
                    p_obt_single = inst_sm_single_ind.pvalue
                    yield assert_allclose, p_obt_single, p_obt, 1e-07, 0, \
                        False, \
                        err_msg_i_p + " - list of int and int as ".upper() + \
                        "argument don't yield the same result!".upper()

Пример #2

Показать файл

Файл: test_var_jmulti.py Проект: ChadFulton/statsmodels

def test_causality():  # test Granger- and instantaneous causality
    if debug_mode:
        if "causality" not in to_test:
            return
        else:
            print("\n\nCAUSALITY", end="")
    for ds in datasets:
        for dt in dt_s_list:
            if debug_mode:
                print("\n" + dt_s_tup_to_string(dt) + ": ", end="")

            err_msg_g_p = build_err_msg(ds, dt, "GRANGER CAUS. - p-VALUE")
            err_msg_g_t = build_err_msg(ds, dt, "GRANGER CAUS. - TEST STAT.")
            err_msg_i_p = build_err_msg(ds, dt, "INSTANT. CAUS. - p-VALUE")
            err_msg_i_t = build_err_msg(ds, dt, "INSTANT. CAUS. - TEST STAT.")
            v_ind = range(len(ds.variable_names))
            for causing_ind in sublists(v_ind, 1, len(v_ind)-1):
                causing_names = ["y" + str(i+1) for i in causing_ind]
                causing_key = tuple(ds.variable_names[i] for i in causing_ind)

                caused_ind = [i for i in v_ind if i not in causing_ind]
                caused_names = ["y" + str(i+1) for i in caused_ind]
                caused_key = tuple(ds.variable_names[i] for i in caused_ind)

                # test Granger-causality ######################################
                granger_sm_ind = results_sm[ds][
                    dt].test_causality(caused_ind, causing_ind)
                granger_sm_str = results_sm[ds][
                    dt].test_causality(caused_names, causing_names)

                # test test-statistic for Granger non-causality:
                g_t_obt = granger_sm_ind.test_statistic
                g_t_des = results_ref[ds][dt]["granger_caus"][
                    "test_stat"][(causing_key, caused_key)]
                assert_allclose(g_t_obt, g_t_des, rtol, atol, False, err_msg_g_t)
                # check whether string sequences as args work in the same way:
                g_t_obt_str = granger_sm_str.test_statistic
                assert_allclose(g_t_obt_str, g_t_obt, 1e-07, 0, False,
                                err_msg_g_t + " - sequences of integers and ".upper() +
                                "strings as arguments don't yield the same result!".upper())
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1 or len(caused_ind) == 1:
                    ci = causing_ind[0] if len(causing_ind)==1 else causing_ind
                    ce = caused_ind[0] if len(caused_ind) == 1 else caused_ind
                    granger_sm_single_ind = results_sm[ds][
                        dt].test_causality(ce, ci)
                    g_t_obt_single = granger_sm_single_ind.test_statistic
                    assert_allclose(g_t_obt_single, g_t_obt, 1e-07, 0, False,
                                    err_msg_g_t + " - list of int and int as ".upper() +
                                    "argument don't yield the same result!".upper())

                # test p-value for Granger non-causality:
                g_p_obt = granger_sm_ind.pvalue
                g_p_des = results_ref[ds][dt]["granger_caus"]["p"][(
                    causing_key, caused_key)]
                assert_allclose(g_p_obt, g_p_des, rtol, atol, False, err_msg_g_p)
                # check whether string sequences as args work in the same way:
                g_p_obt_str = granger_sm_str.pvalue
                assert_allclose(g_p_obt_str, g_p_obt, 1e-07, 0, False,
                                err_msg_g_t + " - sequences of integers and ".upper() +
                                "strings as arguments don't yield the same result!".upper())
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    g_p_obt_single = granger_sm_single_ind.pvalue
                    assert_allclose(g_p_obt_single, g_p_obt, 1e-07, 0, False,
                        err_msg_g_t + " - list of int and int as ".upper() + \
                                    "argument don't yield the same result!".upper())

                # test instantaneous causality ################################
                inst_sm_ind = results_sm[ds][dt].test_inst_causality(
                    causing_ind)
                inst_sm_str = results_sm[ds][dt].test_inst_causality(
                    causing_names)
                # test test-statistic for instantaneous non-causality
                t_obt = inst_sm_ind.test_statistic
                t_des = results_ref[ds][dt]["inst_caus"][
                    "test_stat"][(causing_key, caused_key)]
                assert_allclose(t_obt, t_des, rtol, atol, False, err_msg_i_t)
                # check whether string sequences as args work in the same way:
                t_obt_str = inst_sm_str.test_statistic
                assert_allclose(t_obt_str, t_obt, 1e-07, 0, False,
                                err_msg_i_t + " - sequences of integers and ".upper() +
                                "strings as arguments don't yield the same result!".upper())
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    inst_sm_single_ind = results_sm[ds][
                        dt].test_inst_causality(causing_ind[0])
                    t_obt_single = inst_sm_single_ind.test_statistic
                    assert_allclose(t_obt_single, t_obt, 1e-07, 0, False,
                                    err_msg_i_t + " - list of int and int as ".upper() +
                                    "argument don't yield the same result!".upper())

                # test p-value for instantaneous non-causality
                p_obt = results_sm[ds][dt].test_inst_causality(
                    causing_ind).pvalue
                p_des = results_ref[ds][dt]["inst_caus"]["p"][(
                    causing_key, caused_key)]
                assert_allclose(p_obt, p_des, rtol, atol, False, err_msg_i_p)
                # check whether string sequences as args work in the same way:
                p_obt_str = inst_sm_str.pvalue
                assert_allclose(p_obt_str, p_obt, 1e-07, 0, False,
                                err_msg_i_p + " - sequences of integers and ".upper() +
                                "strings as arguments don't yield the same result!".upper())
                # check if int (e.g. 0) as index and list of int ([0]) yield
                # the same result:
                if len(causing_ind) == 1:
                    inst_sm_single_ind = results_sm[ds][
                        dt].test_inst_causality(causing_ind[0])
                    p_obt_single = inst_sm_single_ind.pvalue
                    assert_allclose(p_obt_single, p_obt, 1e-07, 0, False,
                                    err_msg_i_p + " - list of int and int as ".upper() +
                                    "argument don't yield the same result!".upper())

Пример #3

Показать файл

Файл: parse_jmulti_var_output.py Проект: arnab0000/Internships

def load_results_jmulti(dataset, dt_s_list):
    """

    Parameters
    ----------
    dataset : module
        A data module in the statsmodels/datasets directory that defines a
        __str__() method returning the dataset's name.
    dt_s_list : list
        A list of strings where each string represents a combination of
        deterministic terms.

    Returns
    -------
    result : dict
        A dict (keys: tuples of deterministic terms and seasonal terms)
        of dicts (keys: strings "est" (for estimators),
                              "se" (for standard errors),
                              "t" (for t-values),
                              "p" (for p-values))
        of dicts (keys: strings "alpha", "beta", "Gamma" and other results)
    """
    source = "jmulti"

    results_dict_per_det_terms = dict.fromkeys(dt_s_list)

    for dt_s in dt_s_list:
        dt_string = dt_s_tup_to_string(dt_s)
        params_file = dataset.__str__(
        ) + "_" + source + "_" + dt_string + ".txt"
        params_file = os.path.join(here, params_file)
        # sections in jmulti output:
        section_headers = [
            "Lagged endogenous term",  # parameter matrices
            "Deterministic term"
        ]  # c, s, ct
        if dt_string == "nc":
            del section_headers[-1]

        results = dict()
        results["est"] = dict.fromkeys(section_headers)
        results["se"] = dict.fromkeys(section_headers)
        results["t"] = dict.fromkeys(section_headers)
        results["p"] = dict.fromkeys(section_headers)
        result = []
        result_se = []
        result_t = []
        result_p = []

        rows = 0
        started_reading_section = False
        start_end_mark = "-----"

        # ---------------------------------------------------------------------
        # parse information about \alpha, \beta, \Gamma, deterministic of VECM
        # and A_i and deterministic of corresponding VAR:
        section = -1
        params_file = open(params_file, encoding='latin_1')
        for line in params_file:
            if section == -1 and section_headers[section + 1] not in line:
                continue
            if section < len(section_headers)-1 \
                    and section_headers[section+1] in line:  # new section
                section += 1
                continue
            if not started_reading_section:
                if line.startswith(start_end_mark):
                    started_reading_section = True
                continue
            if started_reading_section:
                if line.startswith(start_end_mark):
                    if result == []:  # no values collected in section "Legend"
                        started_reading_section = False
                        continue
                    results["est"][section_headers[section]] = np.column_stack(
                        result)
                    result = []
                    results["se"][section_headers[section]] = np.column_stack(
                        result_se)
                    result_se = []
                    results["t"][section_headers[section]] = np.column_stack(
                        result_t)
                    result_t = []
                    results["p"][section_headers[section]] = np.column_stack(
                        result_p)
                    result_p = []
                    started_reading_section = False
                    continue
                str_number = r"-?\d+\.\d{3}"
                regex_est = re.compile(str_number + r"[^\)\]\}]")
                est_col = re.findall(regex_est, line)
                # standard errors in parantheses in JMulTi output:
                regex_se = re.compile(r"\(" + str_number + r"\)")
                se_col = re.findall(regex_se, line)
                # t-values in brackets in JMulTi output:
                regex_t_value = re.compile(r"\[" + str_number + r"\]")
                t_col = re.findall(regex_t_value, line)
                # p-values in braces in JMulTi output:
                regex_p_value = re.compile(r"\{" + str_number + r"\}")
                p_col = re.findall(regex_p_value, line)
                if result == [] and est_col != []:
                    rows = len(est_col)
                if est_col != []:
                    est_col = [float(el) for el in est_col]
                    result.append(est_col)
                elif se_col != []:
                    for i in range(rows):
                        se_col[i] = se_col[i].replace("(", "").replace(")", "")
                    se_col = [float(el) for el in se_col]
                    result_se.append(se_col)
                elif t_col != []:
                    for i in range(rows):
                        t_col[i] = t_col[i].replace("[", "").replace("]", "")
                    t_col = [float(el) for el in t_col]
                    result_t.append(t_col)
                elif p_col != []:
                    for i in range(rows):
                        p_col[i] = p_col[i].replace("{", "").replace("}", "")
                    p_col = [float(el) for el in p_col]
                    result_p.append(p_col)
        params_file.close()

        # ---------------------------------------------------------------------
        # parse information regarding \Sigma_u
        sigmau_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_Sigmau" + ".txt"
        sigmau_file = os.path.join(here, sigmau_file)
        rows_to_parse = 0
        # all numbers of Sigma_u in notation with e (e.g. 2.283862e-05)
        regex_est = re.compile(r"\s+\S+e\S+")
        sigmau_section_reached = False
        sigmau_file = open(sigmau_file, encoding='latin_1')
        for line in sigmau_file:
            if line.startswith("Log Likelihood:"):
                line = line[len("Log Likelihood:"):]
                results["log_like"] = float(re.findall(regex_est, line)[0])
                continue
            if not sigmau_section_reached and "Covariance:" not in line:
                continue
            if "Covariance:" in line:
                sigmau_section_reached = True
                row = re.findall(regex_est, line)
                rows_to_parse = len(row)  # Sigma_u quadratic ==> #rows==#cols
                sigma_u = np.empty((rows_to_parse, rows_to_parse))
            row = re.findall(regex_est, line)
            rows_to_parse -= 1
            sigma_u[rows_to_parse] = row  # rows are added in reverse order...
            if rows_to_parse == 0:
                break
        sigmau_file.close()
        results["est"]["Sigma_u"] = sigma_u[::-1]  # ...and reversed again here

        # ---------------------------------------------------------------------
        # parse forecast related output:
        fc_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_fc5" + ".txt"
        fc_file = os.path.join(here, fc_file)
        fc, lower, upper, plu_min = [], [], [], []
        fc_file = open(fc_file, encoding='latin_1')
        for line in fc_file:
            str_number = r"(\s+-?\d+\.\d{3}\s*)"
            regex_number = re.compile(str_number)
            numbers = re.findall(regex_number, line)
            if numbers == []:
                continue
            fc.append(float(numbers[0]))
            lower.append(float(numbers[1]))
            upper.append(float(numbers[2]))
            plu_min.append(float(numbers[3]))
        fc_file.close()
        neqs = len(results["est"]["Sigma_u"])
        fc = np.hstack(np.vsplit(np.array(fc)[:, None], neqs))
        lower = np.hstack(np.vsplit(np.array(lower)[:, None], neqs))
        upper = np.hstack(np.vsplit(np.array(upper)[:, None], neqs))
        results["fc"] = dict.fromkeys(["fc", "lower", "upper"])
        results["fc"]["fc"] = fc
        results["fc"]["lower"] = lower
        results["fc"]["upper"] = upper

        # ---------------------------------------------------------------------
        # parse output related to Granger-caus. and instantaneous causality:
        results["granger_caus"] = dict.fromkeys(["p", "test_stat"])
        results["granger_caus"]["p"] = dict()
        results["granger_caus"]["test_stat"] = dict()
        results["inst_caus"] = dict.fromkeys(["p", "test_stat"])
        results["inst_caus"]["p"] = dict()
        results["inst_caus"]["test_stat"] = dict()
        vn = dataset.variable_names
        # all possible combinations of potentially causing variables
        # (at least 1 variable and not all variables together):
        var_combs = sublists(vn, 1, len(vn) - 1)
        if debug_mode:
            print("\n\n\n" + dt_string)
        for causing in var_combs:
            caused = tuple(name for name in vn if name not in causing)
            causality_file = dataset.__str__() + "_" + source + "_" \
                + dt_string + "_granger_causality_" \
                + stringify_var_names(causing, "_") + ".txt"
            causality_file = os.path.join(here, causality_file)
            causality_file = open(causality_file)
            causality_results = []
            for line in causality_file:
                str_number = r"\d+\.\d{4}"
                regex_number = re.compile(str_number)
                number = re.search(regex_number, line)
                if number is None:
                    continue
                number = float(number.group(0))
                causality_results.append(number)
            causality_file.close()
            results["granger_caus"]["test_stat"][(causing, caused)] = \
                causality_results[0]
            results["granger_caus"]["p"][(causing, caused)] =\
                causality_results[1]
            results["inst_caus"]["test_stat"][(causing, caused)] = \
                causality_results[2]
            results["inst_caus"]["p"][(causing, caused)] = \
                causality_results[3]

        # ---------------------------------------------------------------------
        # parse output related to impulse-response analysis:
        ir_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_ir" + ".txt"
        ir_file = os.path.join(here, ir_file)
        ir_file = open(ir_file, encoding='latin_1')
        causing = None
        caused = None
        data = None
        regex_vars = re.compile(r"\w+")
        regex_vals = re.compile(r"-?\d+\.\d{4}")
        line_start_causing = "time"
        data_line_indicator = "point estimate"
        data_rows_read = 0
        for line in ir_file:
            if causing is None and not line.startswith(line_start_causing):
                continue  # no relevant info in the header
            if line.startswith(line_start_causing):
                line = line[4:]
                causing = re.findall(regex_vars, line)
                # 21 periods shown in JMulTi output
                data = np.empty((21, len(causing)))
                continue
            if caused is None:
                caused = re.findall(regex_vars, line)
                continue
            # now start collecting the values:
            if data_line_indicator not in line:
                continue
            start = line.find(data_line_indicator) + len(data_line_indicator)
            line = line[start:]
            data[data_rows_read] = re.findall(regex_vals, line)
            data_rows_read += 1
        ir_file.close()
        results["ir"] = data

        # ---------------------------------------------------------------------
        # parse output related to lag order selection:
        lagorder_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_lagorder" + ".txt"
        lagorder_file = os.path.join(here, lagorder_file)
        lagorder_file = open(lagorder_file, encoding='latin_1')
        results["lagorder"] = dict()
        aic_start = "Akaike Info Criterion:"
        fpe_start = "Final Prediction Error:"
        hqic_start = "Hannan-Quinn Criterion:"
        bic_start = "Schwarz Criterion:"
        for line in lagorder_file:
            if line.startswith(aic_start):
                results["lagorder"]["aic"] = int(line[len(aic_start):])
            elif line.startswith(fpe_start):
                results["lagorder"]["fpe"] = int(line[len(fpe_start):])
            elif line.startswith(hqic_start):
                results["lagorder"]["hqic"] = int(line[len(hqic_start):])
            elif line.startswith(bic_start):
                results["lagorder"]["bic"] = int(line[len(bic_start):])
        lagorder_file.close()

        # ---------------------------------------------------------------------
        # parse output related to non-normality-test:
        test_norm_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_diag" + ".txt"
        test_norm_file = os.path.join(here, test_norm_file)
        test_norm_file = open(test_norm_file, encoding='latin_1')
        results["test_norm"] = dict()
        section_start_marker = "TESTS FOR NONNORMALITY"
        section_reached = False
        subsection_start_marker = "Introduction to Multiple Time Series A"
        subsection_reached = False
        line_start_statistic = "joint test statistic:"
        line_start_pvalue = " p-value:"
        for line in test_norm_file:
            if not section_reached:
                if section_start_marker in line:
                    section_reached = True  # section w/ relevant results found
                continue
            if not subsection_reached:
                if subsection_start_marker in line:
                    subsection_reached = True
                continue
            if "joint_pvalue" in results["test_norm"].keys():
                break
            if line.startswith(line_start_statistic):
                line_end = line[len(line_start_statistic):]
                results["test_norm"]["joint_test_statistic"] = float(line_end)
            if line.startswith(line_start_pvalue):
                line_end = line[len(line_start_pvalue):]
                results["test_norm"]["joint_pvalue"] = float(line_end)
        test_norm_file.close()

        # ---------------------------------------------------------------------
        # parse output related to testing the whiteness of the residuals:
        whiteness_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_diag" + ".txt"
        whiteness_file = os.path.join(here, whiteness_file)
        whiteness_file = open(whiteness_file, encoding='latin_1')
        results["whiteness"] = dict()
        section_start_marker = "PORTMANTEAU TEST"
        order_start = "tested order:"
        statistic_start = "test statistic:"
        p_start = " p-value:"
        adj_statistic_start = "adjusted test statistic:"
        unadjusted_finished = False

        in_section = False
        for line in whiteness_file:
            if not in_section and section_start_marker not in line:
                continue
            if not in_section and section_start_marker in line:
                in_section = True
                continue
            if line.startswith(order_start):
                results["whiteness"]["tested order"] = int(
                    line[len(order_start):])
                continue
            if line.startswith(statistic_start):
                results["whiteness"]["test statistic"] = float(
                    line[len(statistic_start):])
                continue
            if line.startswith(adj_statistic_start):
                results["whiteness"]["test statistic adj."] = float(
                    line[len(adj_statistic_start):])
                continue
            if line.startswith(p_start):  # same for unadjusted and adjusted
                if not unadjusted_finished:
                    results["whiteness"]["p-value"] = \
                        float(line[len(p_start):])
                    unadjusted_finished = True
                else:
                    results["whiteness"]["p-value adjusted"] = \
                        float(line[len(p_start):])
                    break
        whiteness_file.close()

        # ---------------------------------------------------------------------
        if debug_mode:
            print_debug_output(results, dt_string)

        results_dict_per_det_terms[dt_s] = results

    return results_dict_per_det_terms

Пример #4

Показать файл

Файл: parse_jmulti_var_output.py Проект: ChadFulton/statsmodels

def load_results_jmulti(dataset, dt_s_list):
    """

    Parameters
    ----------
    dataset : module
        A data module in the statsmodels/datasets directory that defines a
        __str__() method returning the dataset's name.
    dt_s_list : list
        A list of strings where each string represents a combination of
        deterministic terms.

    Returns
    -------
    result : dict
        A dict (keys: tuples of deterministic terms and seasonal terms)
        of dicts (keys: strings "est" (for estimators),
                              "se" (for standard errors),
                              "t" (for t-values),
                              "p" (for p-values))
        of dicts (keys: strings "alpha", "beta", "Gamma" and other results)
    """
    source = "jmulti"

    results_dict_per_det_terms = dict.fromkeys(dt_s_list)

    for dt_s in dt_s_list:
        dt_string = dt_s_tup_to_string(dt_s)
        params_file = dataset.__str__()+"_"+source+"_"+dt_string+".txt"
        params_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   params_file)
        # sections in jmulti output:
        section_headers = ["Lagged endogenous term",  # parameter matrices
                           "Deterministic term"]      # c, s, ct
        if dt_string == "nc":
            del section_headers[-1]

        results = dict()
        results["est"] = dict.fromkeys(section_headers)
        results["se"] = dict.fromkeys(section_headers)
        results["t"] = dict.fromkeys(section_headers)
        results["p"] = dict.fromkeys(section_headers)
        result = []
        result_se = []
        result_t = []
        result_p = []

        rows = 0
        started_reading_section = False
        start_end_mark = "-----"

        # ---------------------------------------------------------------------
        # parse information about \alpha, \beta, \Gamma, deterministic of VECM
        # and A_i and deterministic of corresponding VAR:
        section = -1
        params_file = open(params_file, encoding='latin_1')
        for line in params_file:
            if section == -1 and section_headers[section+1] not in line:
                continue
            if section < len(section_headers)-1 \
                    and section_headers[section+1] in line:  # new section
                section += 1
                continue
            if not started_reading_section:
                if line.startswith(start_end_mark):
                    started_reading_section = True
                continue
            if started_reading_section:
                if line.startswith(start_end_mark):
                    if result == []:  # no values collected in section "Legend"
                        started_reading_section = False
                        continue
                    results["est"][section_headers[section]] = np.column_stack(
                            result)
                    result = []
                    results["se"][section_headers[section]] = np.column_stack(
                            result_se)
                    result_se = []
                    results["t"][section_headers[section]] = np.column_stack(
                            result_t)
                    result_t = []
                    results["p"][section_headers[section]] = np.column_stack(
                            result_p)
                    result_p = []
                    started_reading_section = False
                    continue
                str_number = r"-?\d+\.\d{3}"
                regex_est = re.compile(str_number + r"[^\)\]\}]")
                est_col = re.findall(regex_est, line)
                # standard errors in parantheses in JMulTi output:
                regex_se = re.compile(r"\(" + str_number + r"\)")
                se_col = re.findall(regex_se, line)
                # t-values in brackets in JMulTi output:
                regex_t_value = re.compile(r"\[" + str_number + r"\]")
                t_col = re.findall(regex_t_value, line)
                # p-values in braces in JMulTi output:
                regex_p_value = re.compile(r"\{" + str_number + r"\}")
                p_col = re.findall(regex_p_value, line)
                if result == [] and est_col != []:
                    rows = len(est_col)
                if est_col != []:
                    est_col = [float(el) for el in est_col]
                    result.append(est_col)
                elif se_col != []:
                    for i in range(rows):
                        se_col[i] = se_col[i].replace("(", "").replace(")", "")
                    se_col = [float(el) for el in se_col]
                    result_se.append(se_col)
                elif t_col != []:
                    for i in range(rows):
                        t_col[i] = t_col[i].replace("[", "").replace("]", "")
                    t_col = [float(el) for el in t_col]
                    result_t.append(t_col)
                elif p_col != []:
                    for i in range(rows):
                        p_col[i] = p_col[i].replace("{", "").replace("}", "")
                    p_col = [float(el) for el in p_col]
                    result_p.append(p_col)
        params_file.close()

        # ---------------------------------------------------------------------
        # parse information regarding \Sigma_u
        sigmau_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_Sigmau" + ".txt"
        sigmau_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   sigmau_file)
        rows_to_parse = 0
        # all numbers of Sigma_u in notation with e (e.g. 2.283862e-05)
        regex_est = re.compile(r"\s+\S+e\S+")
        sigmau_section_reached = False
        sigmau_file = open(sigmau_file, encoding='latin_1')
        for line in sigmau_file:
            if line.startswith("Log Likelihood:"):
                line = line[len("Log Likelihood:"):]
                results["log_like"] = float(re.findall(regex_est, line)[0])
                continue
            if not sigmau_section_reached and "Covariance:" not in line:
                continue
            if "Covariance:" in line:
                sigmau_section_reached = True
                row = re.findall(regex_est, line)
                rows_to_parse = len(row)  # Sigma_u quadratic ==> #rows==#cols
                sigma_u = np.empty((rows_to_parse, rows_to_parse))
            row = re.findall(regex_est, line)
            rows_to_parse -= 1
            sigma_u[rows_to_parse] = row  # rows are added in reverse order...
            if rows_to_parse == 0:
                break
        sigmau_file.close()
        results["est"]["Sigma_u"] = sigma_u[::-1]  # ...and reversed again here

        # ---------------------------------------------------------------------
        # parse forecast related output:
        fc_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_fc5" + ".txt"
        fc_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               fc_file)
        fc, lower, upper, plu_min = [], [], [], []
        fc_file = open(fc_file, encoding='latin_1')
        for line in fc_file:
            str_number = r"(\s+-?\d+\.\d{3}\s*)"
            regex_number = re.compile(str_number)
            numbers = re.findall(regex_number, line)
            if numbers == []:
                continue
            fc.append(float(numbers[0]))
            lower.append(float(numbers[1]))
            upper.append(float(numbers[2]))
            plu_min.append(float(numbers[3]))
        fc_file.close()
        neqs = len(results["est"]["Sigma_u"])
        fc = np.hstack(np.vsplit(np.array(fc)[:, None], neqs))
        lower = np.hstack(np.vsplit(np.array(lower)[:, None], neqs))
        upper = np.hstack(np.vsplit(np.array(upper)[:, None], neqs))
        results["fc"] = dict.fromkeys(["fc", "lower", "upper"])
        results["fc"]["fc"] = fc
        results["fc"]["lower"] = lower
        results["fc"]["upper"] = upper

        # ---------------------------------------------------------------------
        # parse output related to Granger-caus. and instantaneous causality:
        results["granger_caus"] = dict.fromkeys(["p", "test_stat"])
        results["granger_caus"]["p"] = dict()
        results["granger_caus"]["test_stat"] = dict()
        results["inst_caus"] = dict.fromkeys(["p", "test_stat"])
        results["inst_caus"]["p"] = dict()
        results["inst_caus"]["test_stat"] = dict()
        vn = dataset.variable_names
        # all possible combinations of potentially causing variables
        # (at least 1 variable and not all variables together):
        var_combs = sublists(vn, 1, len(vn)-1)
        if debug_mode:
            print("\n\n\n" + dt_string)
        for causing in var_combs:
            caused = tuple(name for name in vn if name not in causing)
            causality_file = dataset.__str__() + "_" + source + "_" \
                + dt_string + "_granger_causality_" \
                + stringify_var_names(causing, "_") + ".txt"
            causality_file = os.path.join(os.path.dirname(
                    os.path.realpath(__file__)), causality_file)
            causality_file = open(causality_file)
            causality_results = []
            for line in causality_file:
                str_number = r"\d+\.\d{4}"
                regex_number = re.compile(str_number)
                number = re.search(regex_number, line)
                if number is None:
                    continue
                number = float(number.group(0))
                causality_results.append(number)
            causality_file.close()
            results["granger_caus"]["test_stat"][(causing, caused)] = \
                causality_results[0]
            results["granger_caus"]["p"][(causing, caused)] =\
                causality_results[1]
            results["inst_caus"]["test_stat"][(causing, caused)] = \
                causality_results[2]
            results["inst_caus"]["p"][(causing, caused)] = \
                causality_results[3]

        # ---------------------------------------------------------------------
        # parse output related to impulse-response analysis:
        ir_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_ir" + ".txt"
        ir_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               ir_file)
        ir_file = open(ir_file, encoding='latin_1')
        causing = None
        caused = None
        data = None
        regex_vars = re.compile(r"\w+")
        regex_vals = re.compile(r"-?\d+\.\d{4}")
        line_start_causing = "time"
        data_line_indicator = "point estimate"
        data_rows_read = 0
        for line in ir_file:
            if causing is None and not line.startswith(line_start_causing):
                continue  # no relevant info in the header
            if line.startswith(line_start_causing):
                line = line[4:]
                causing = re.findall(regex_vars, line)
                # 21 periods shown in JMulTi output
                data = np.empty((21, len(causing)))
                continue
            if caused is None:
                caused = re.findall(regex_vars, line)
                continue
            # now start collecting the values:
            if data_line_indicator not in line:
                continue
            start = line.find(data_line_indicator) + len(data_line_indicator)
            line = line[start:]
            data[data_rows_read] = re.findall(regex_vals, line)
            data_rows_read += 1
        ir_file.close()
        results["ir"] = data

        # ---------------------------------------------------------------------
        # parse output related to lag order selection:
        lagorder_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_lagorder" + ".txt"
        lagorder_file = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), lagorder_file)
        lagorder_file = open(lagorder_file, encoding='latin_1')
        results["lagorder"] = dict()
        aic_start = "Akaike Info Criterion:"
        fpe_start = "Final Prediction Error:"
        hqic_start = "Hannan-Quinn Criterion:"
        bic_start = "Schwarz Criterion:"
        for line in lagorder_file:
            if line.startswith(aic_start):
                results["lagorder"]["aic"] = int(line[len(aic_start):])
            elif line.startswith(fpe_start):
                results["lagorder"]["fpe"] = int(line[len(fpe_start):])
            elif line.startswith(hqic_start):
                results["lagorder"]["hqic"] = int(line[len(hqic_start):])
            elif line.startswith(bic_start):
                results["lagorder"]["bic"] = int(line[len(bic_start):])
        lagorder_file.close()

        # ---------------------------------------------------------------------
        # parse output related to non-normality-test:
        test_norm_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_diag" + ".txt"
        test_norm_file = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), test_norm_file)
        test_norm_file = open(test_norm_file, encoding='latin_1')
        results["test_norm"] = dict()
        section_start_marker = "TESTS FOR NONNORMALITY"
        section_reached = False
        subsection_start_marker = "Introduction to Multiple Time Series A"
        subsection_reached = False
        line_start_statistic = "joint test statistic:"
        line_start_pvalue = " p-value:"
        for line in test_norm_file:
            if not section_reached:
                if section_start_marker in line:
                    section_reached = True  # section w/ relevant results found
                continue
            if not subsection_reached:
                if subsection_start_marker in line:
                    subsection_reached = True
                continue
            if "joint_pvalue" in results["test_norm"].keys():
                break
            if line.startswith(line_start_statistic):
                line_end = line[len(line_start_statistic):]
                results["test_norm"]["joint_test_statistic"] = float(line_end)
            if line.startswith(line_start_pvalue):
                line_end = line[len(line_start_pvalue):]
                results["test_norm"]["joint_pvalue"] = float(line_end)
        test_norm_file.close()

        # ---------------------------------------------------------------------
        # parse output related to testing the whiteness of the residuals:
        whiteness_file = dataset.__str__() + "_" + source + "_" + dt_string \
            + "_diag" + ".txt"
        whiteness_file = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), whiteness_file)
        whiteness_file = open(whiteness_file, encoding='latin_1')
        results["whiteness"] = dict()
        section_start_marker = "PORTMANTEAU TEST"
        order_start = "tested order:"
        statistic_start = "test statistic:"
        p_start = " p-value:"
        adj_statistic_start = "adjusted test statistic:"
        unadjusted_finished = False

        in_section = False
        for line in whiteness_file:
            if not in_section and section_start_marker not in line:
                continue
            if not in_section and section_start_marker in line:
                in_section = True
                continue
            if line.startswith(order_start):
                results["whiteness"]["tested order"] = int(
                        line[len(order_start):])
                continue
            if line.startswith(statistic_start):
                results["whiteness"]["test statistic"] = float(
                        line[len(statistic_start):])
                continue
            if line.startswith(adj_statistic_start):
                results["whiteness"]["test statistic adj."] = float(
                        line[len(adj_statistic_start):])
                continue
            if line.startswith(p_start):  # same for unadjusted and adjusted
                if not unadjusted_finished:
                    results["whiteness"]["p-value"] = \
                        float(line[len(p_start):])
                    unadjusted_finished = True
                else:
                    results["whiteness"]["p-value adjusted"] = \
                        float(line[len(p_start):])
                    break
        whiteness_file.close()

        # ---------------------------------------------------------------------
        if debug_mode:
            print_debug_output(results, dt_string)

        results_dict_per_det_terms[dt_s] = results

    return results_dict_per_det_terms