def spss_corr_apa_table_excel(mod_raw_data_df, output_df):
    # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future
    correlation_label = mod_raw_data_df.iloc[0, 1]
    variables_list = list(mod_raw_data_df.columns)[2:]

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols, header_rows = variables_list, variables_list

    wb = Workbook()
    ws = wb.active

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    inside_loop_ind_start = 2
    for outside_loop_ind in range(2, len(header_rows) + 2):

        if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
        elif global_vars.corr_table_triangle == "Lower triangle":
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value

        for inside_loop_ind in range(inside_loop_ind_start,
                                     len(header_cols) + 2):

            if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
            elif global_vars.corr_table_triangle == "Lower triangle":
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

            if outside_loop_var == inside_loop_var:
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
            else:
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["var1"] == outside_loop_var) &
                    (output_df["var2"] == inside_loop_var)) | (
                        (output_df["var1"] == inside_loop_var) &
                        (output_df["var2"] == outside_loop_var))].iloc[0]
                r = df_filtered[correlation_label]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)

                if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                    ws.cell(row=outside_loop_ind,
                            column=inside_loop_ind).value = r
                elif global_vars.corr_table_triangle == "Lower triangle":
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r

        if global_vars.corr_table_triangle != "Both":
            inside_loop_ind_start += 1

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1]:
        cell.font = global_vars.font_header

    for row in range(2, len(header_rows) + 2):
        ws.cell(row=row, column=1).font = global_vars.font_header

    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)

    table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)]
    table_notes.append(
        "Correlation Coefficient used: {}".format(correlation_label))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
def spss_corr_apa_table_word(mod_raw_data_df, output_df):
    # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future
    correlation_label = mod_raw_data_df.iloc[0, 1]
    variables_list = list(mod_raw_data_df.columns)[2:]

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [
            x for x in variables_list
        ]  # not straight up variables_list; pass by value vs pass by reference
        header_rows = [x for x in variables_list]
    # this adds an empty column where the significance signs will be placed for better presentation
    # the code looks slightly wrong as it return a None array but it works
    [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)]

    doc = Document()
    table_rows_len = len(header_rows) + 1
    table_cols_len = len(header_cols) + 1
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate([""] + header_cols):
        table.cell(row_idx=0, col_idx=ind).text = var
    for ind, var in enumerate([""] + header_rows):
        table.cell(row_idx=ind, col_idx=0).text = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_rows_len):
            outside_loop_var = table.cell(row_idx=outside_loop_ind,
                                          col_idx=0).text
            for inside_loop_ind in range(inside_loop_ind_start,
                                         table_cols_len):
                inside_loop_var = table.cell(row_idx=0,
                                             col_idx=inside_loop_ind).text
                if inside_loop_var == "" or outside_loop_var == "":  # this allows to skip the columns designed for the significance signs
                    continue
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[(
                        (output_df["var1"] == outside_loop_var) &
                        (output_df["var2"] == inside_loop_var)) | (
                            (output_df["var1"] == inside_loop_var) &
                            (output_df["var2"] == outside_loop_var))].iloc[0]
                    r = df_filtered[correlation_label]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=outside_loop_ind,
                               col_idx=inside_loop_ind).text = r
                    table.cell(row_idx=outside_loop_ind,
                               col_idx=inside_loop_ind + 1).text = sign
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            if outside_loop_var == "":
                continue
            else:
                for inside_loop_ind in range(inside_loop_ind_start,
                                             table_rows_len):
                    inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                                 col_idx=0).text
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[(
                        (output_df["var1"] == outside_loop_var) &
                        (output_df["var2"] == inside_loop_var)) | (
                            (output_df["var1"] == inside_loop_var) &
                            (output_df["var2"] == outside_loop_var))].iloc[0]
                    r = df_filtered[correlation_label]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = r
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind + 1).text = sign
                inside_loop_ind_start += 1
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            if outside_loop_var == "":
                continue
            else:
                for inside_loop_ind in range(inside_loop_ind_start,
                                             table_rows_len):
                    inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                                 col_idx=0).text
                    if inside_loop_var == "":  # this allows to skip the columns designed for the significance signs
                        continue
                    else:
                        if outside_loop_var == inside_loop_var:
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind).text = "1"
                        else:
                            # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                            df_filtered = output_df[(
                                (output_df["var1"] == outside_loop_var) &
                                (output_df["var2"] == inside_loop_var)) | (
                                    (output_df["var1"] == inside_loop_var) &
                                    (output_df["var2"] == outside_loop_var)
                                )].iloc[0]
                            r = df_filtered[correlation_label]
                            p = df_filtered["adjusted_pvalues"]
                            r = helper_funcs.correlations_format_val(r, p)
                            if "_" in r:
                                r, sign = r.split("_")
                            else:
                                sign = ""
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind).text = r
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind +
                                       1).text = sign

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)

    for i in range(1, table_cols_len, 2):
        table.cell(row_idx=0,
                   col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1))
        table.cell(row_idx=0, col_idx=i).text = table.cell(
            row_idx=0, col_idx=i
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].paragraph_format.space_after = Inches(0)
            cell.paragraphs[0].paragraph_format.space_before = Inches(0)
            cell.paragraphs[0].paragraph_format.line_spacing = 1

    for row in range(1, table_rows_len):
        for col in range(1, table_cols_len, 2):
            table.cell(
                row_idx=row, col_idx=col
            ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08)
            table.cell(
                row_idx=row, col_idx=col +
                1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06)
            table.cell(
                row_idx=row,
                col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
            table.cell(row_idx=row, col_idx=col +
                       1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT

    doc = helper_funcs.set_autofit(doc)

    para = doc.add_paragraph("** ")
    para.add_run("p").italic = True
    para.add_run(" < 0.01")
    para.add_run().add_break()
    para.add_run("* ")
    para.add_run("p").italic = True
    para.add_run(" < {}".format(global_vars.alpha_threshold))
    doc.add_paragraph(
        "Correlation Coefficient used: {}".format(correlation_label))
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Пример #3
0
def raw_corr_apa_table_noCIs_excel(mod_raw_data_df, output_df):
    variables_list = list(
        mod_raw_data_df.columns
    )  #local var and .columns preferred to minimize use of global scope

    wb = Workbook()
    ws = wb.active

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    inside_loop_ind_start = 2
    for outside_loop_ind in range(2, len(header_rows) + 2):

        if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
        elif global_vars.corr_table_triangle == "Lower triangle":
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value

        for inside_loop_ind in range(inside_loop_ind_start,
                                     len(header_cols) + 2):

            if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
            elif global_vars.corr_table_triangle == "Lower triangle":
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

            if outside_loop_var == inside_loop_var:
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
            else:
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)

                if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                    ws.cell(row=outside_loop_ind,
                            column=inside_loop_ind).value = r
                elif global_vars.corr_table_triangle == "Lower triangle":
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r

        if global_vars.corr_table_triangle != "Both":
            inside_loop_ind_start += 1

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1] + ws["A"]:
        cell.font = global_vars.font_header

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)
    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    table_notes = [
        "Correlation coefficient used: {}".format(
            list(global_vars.master_dict.keys())[list(
                global_vars.master_dict.values()).index(
                    global_vars.raw_corr_type)])
    ]
    table_notes.append("**p < 0.01")
    table_notes.append("*p < {}".format(global_vars.alpha_threshold))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Пример #4
0
def raw_corr_apa_table_withCIs_word(mod_raw_data_df, output_df):
    variables_list = list(mod_raw_data_df.columns)

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]
    # this adds an empty column where the significance signs will be placed for better presentation
    # then adds empty rows for the CIs
    # the code looks slightly wrong as it return a None array but it works
    [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)]
    [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)]

    doc = Document()
    table_rows_len = len(header_rows) + 1
    table_cols_len = len(header_cols) + 1
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate([""] + header_cols):
        table.cell(row_idx=0, col_idx=ind).text = var
    for ind, var in enumerate([""] + header_rows):
        table.cell(row_idx=ind, col_idx=0).text = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_rows_len, 2):
            outside_loop_var = table.cell(row_idx=outside_loop_ind,
                                          col_idx=0).text
            for inside_loop_ind in range(inside_loop_ind_start, table_cols_len,
                                         2):
                inside_loop_var = table.cell(row_idx=0,
                                             col_idx=inside_loop_ind).text
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                r = helper_funcs.correlations_format_val(r, p)
                if "_" in r:
                    r, sign = r.split("_")
                else:
                    sign = ""
                table.cell(row_idx=outside_loop_ind,
                           col_idx=inside_loop_ind).text = r
                table.cell(row_idx=outside_loop_ind,
                           col_idx=inside_loop_ind + 1).text = sign
                table.cell(row_idx=outside_loop_ind + 1,
                           col_idx=inside_loop_ind
                           ).text = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len, 2):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            for inside_loop_ind in range(inside_loop_ind_start, table_rows_len,
                                         2):
                inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                             col_idx=0).text
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                r = helper_funcs.correlations_format_val(r, p)
                if "_" in r:
                    r, sign = r.split("_")
                else:
                    sign = ""
                table.cell(row_idx=inside_loop_ind,
                           col_idx=outside_loop_ind).text = r
                table.cell(row_idx=inside_loop_ind,
                           col_idx=outside_loop_ind + 1).text = sign
                table.cell(row_idx=inside_loop_ind + 1,
                           col_idx=outside_loop_ind
                           ).text = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len, 2):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            for inside_loop_ind in range(inside_loop_ind_start, table_rows_len,
                                         2):
                inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                             col_idx=0).text
                if outside_loop_var == inside_loop_var:
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = "1"
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[
                        ((output_df["Variable1"] == outside_loop_var) &
                         (output_df["Variable2"] == inside_loop_var)) |
                        ((output_df["Variable1"] == inside_loop_var) &
                         (output_df["Variable2"] == outside_loop_var))].iloc[0]
                    r = df_filtered["Correlation_Coefficient"]
                    p = df_filtered["adjusted_pvalues"]
                    ci_low, ci_high = helper_funcs.correlations_format_val(
                        df_filtered["CI_low"]
                    ), helper_funcs.correlations_format_val(
                        df_filtered["CI_high"])
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = r
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind + 1).text = sign
                    table.cell(row_idx=inside_loop_ind + 1,
                               col_idx=outside_loop_ind
                               ).text = "[" + ci_low + ", " + ci_high + "]"

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    for i in range(1, table_cols_len, 2):
        table.cell(row_idx=0,
                   col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1))
        table.cell(row_idx=0, col_idx=i).text = table.cell(
            row_idx=0, col_idx=i
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it
    for i in range(1, table_rows_len, 2):
        table.cell(row_idx=i,
                   col_idx=0).merge(table.cell(row_idx=i + 1, col_idx=0))
        table.cell(row_idx=i, col_idx=0).text = table.cell(
            row_idx=i, col_idx=0
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it
    for row in range(2, table_rows_len, 2):
        for col in range(1, table_cols_len, 2):
            table.cell(row_idx=row, col_idx=col).merge(
                table.cell(row_idx=row, col_idx=col + 1))
            table.cell(row_idx=row,
                       col_idx=col).alignment = WD_ALIGN_PARAGRAPH.CENTER
            helper_funcs.word_style(table.cell(row_idx=row, col_idx=col),
                                    size=9)

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
        cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for row in range(0, table_rows_len):
        for col in range(1, table_cols_len):
            cell = table.cell(row_idx=row, col_idx=col)
            if col >= 1:
                cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].paragraph_format.space_after = Inches(0)
            cell.paragraphs[0].paragraph_format.space_before = Inches(0)
            cell.paragraphs[0].paragraph_format.line_spacing = 1

    for row in range(1, table_rows_len, 2):
        for col in range(1, table_cols_len, 2):
            table.cell(
                row_idx=row, col_idx=col
            ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08)
            table.cell(
                row_idx=row, col_idx=col +
                1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06)
            table.cell(
                row_idx=row,
                col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
            table.cell(row_idx=row, col_idx=col +
                       1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT

    doc = helper_funcs.set_autofit(doc)

    para = doc.add_paragraph("** ")
    para.add_run("p").italic = True
    para.add_run(" < 0.01")
    para.add_run().add_break()
    para.add_run("* ")
    para.add_run("p").italic = True
    para.add_run(" < {}".format(global_vars.alpha_threshold))
    doc.add_paragraph("Correlation coefficient used: {}".format(
        list(global_vars.master_dict.keys())[list(
            global_vars.master_dict.values()).index(
                global_vars.raw_corr_type)]))
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Пример #5
0
def raw_corr_apa_table_withCIs_excel(mod_raw_data_df, output_df):
    variables_list = list(
        mod_raw_data_df.columns
    )  # local var and .columns preferred to minimize use of global scope

    wb = Workbook()
    ws = wb.active

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]
    # this adds an empty row where the CIs will be
    # the code looks slightly wrong as it return a None array but it works
    [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)]

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_rows) + 2, 2):
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_cols) + 2):
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r
                ws.cell(row=outside_loop_ind + 1, column=inside_loop_ind
                        ).value = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 1
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_cols) + 2):
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_rows) + 2, 2):
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r
                ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind
                        ).value = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_cols) + 2):
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_rows) + 2, 2):
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value
                if outside_loop_var == inside_loop_var:
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = "1"
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[
                        ((output_df["Variable1"] == outside_loop_var) &
                         (output_df["Variable2"] == inside_loop_var)) |
                        ((output_df["Variable1"] == inside_loop_var) &
                         (output_df["Variable2"] == outside_loop_var))].iloc[0]
                    r = df_filtered["Correlation_Coefficient"]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    ci_low, ci_high = helper_funcs.correlations_format_val(
                        df_filtered["CI_low"]
                    ), helper_funcs.correlations_format_val(
                        df_filtered["CI_high"])
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r
                    ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind
                            ).value = "[" + ci_low + ", " + ci_high + "]"

    for row in range(2, len(header_rows) + 1, 2):
        ws.merge_cells(start_row=row,
                       start_column=1,
                       end_row=row + 1,
                       end_column=1)
    if global_vars.corr_table_triangle == "Both":
        for col in range(2, len(header_cols) + 2):
            ws.merge_cells(start_row=col * 2 - 2,
                           start_column=col,
                           end_row=col * 2 - 1,
                           end_column=col)

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1] + ws["A"]:
        cell.font = global_vars.font_header

    for row in range(3, len(header_rows) + 2, 2):
        for cell in ws[row]:
            cell.font = Font(size=9)

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)
    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    table_notes = [
        "Correlation coefficient used: {}".format(
            list(global_vars.master_dict.keys())[list(
                global_vars.master_dict.values()).index(
                    global_vars.raw_corr_type)])
    ]
    table_notes.append("**p < 0.01")
    table_notes.append("*p < {}".format(global_vars.alpha_threshold))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Пример #6
0
def summ_corr_apa_table_excel(mod_raw_data_df, output_df):
	# unique vars in var1 given by user's order and here ONLY adding vars from col 2 that are NOT in var 1
	variables_list = list(output_df[global_vars.summ_corr_varOne].unique()) + list(set(output_df[global_vars.summ_corr_varTwo].unique()) - set(output_df[global_vars.summ_corr_varOne].unique()))
	if global_vars.corr_table_triangle == "Upper triangle":
		header_cols = variables_list[1:]
		header_rows = variables_list[:-1]
	elif global_vars.corr_table_triangle == "Lower triangle":
		header_cols = variables_list[:-1]
		header_rows = variables_list[1:]
	elif global_vars.corr_table_triangle == "Both":
		header_cols, header_rows = variables_list, variables_list

	wb = Workbook()
	ws = wb.active

	ws.append([""] + header_cols)
	for ind,var in enumerate(header_rows):
		ws.cell(row=ind+2, column=1).value = var

	inside_loop_ind_start = 2
	for outside_loop_ind in range(2, len(header_rows) + 2):

		if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
			outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
		elif global_vars.corr_table_triangle == "Lower triangle":
			outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
		
		for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2):
			
			if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
				inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
			elif global_vars.corr_table_triangle == "Lower triangle":
				inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

			if outside_loop_var == inside_loop_var:
				ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
			else:
				# here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
				df_filtered = output_df[((output_df[global_vars.summ_corr_varOne]==outside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==inside_loop_var)) | ((output_df[global_vars.summ_corr_varOne]==inside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==outside_loop_var))].iloc[0]
				r = df_filtered[global_vars.summ_corr_coeff]
				p = df_filtered["adjusted_pvalues"]
				r = helper_funcs.correlations_format_val(r, p)

				if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
					ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r
				elif global_vars.corr_table_triangle == "Lower triangle":
					ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r
		
		if global_vars.corr_table_triangle != "Both":
			inside_loop_ind_start += 1


	for row in range(1, len(header_rows) + 2):
		for cell in ws[row]:
			cell.alignment = global_vars.alignment_center

	for cell in ws[1]:
		cell.font = global_vars.font_header

	for row in range(2, len(header_rows) + 2):
		ws.cell(row=row, column=1).font = global_vars.font_header

	for cell in ws[len(header_rows) + 1]:
		cell.border = Border(bottom=global_vars.border_APA)

	for cell in ws[1]:
		cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA)

	table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)]
	helper_funcs.add_table_notes(ws, table_notes)

	helper_funcs.savefile(wb=wb)