Exemplo n.º 1
0
def get_controls_comparison(order, default_category):
    return dbc.Card([
        get_drop_down(
            f"{order}_category_comparison",
            MAIN_CATEGORIES_TO_CATEGORIES["All"] + [
                f"All_{main_category}"
                for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
            ],
            f"Select {order} category to compare: ",
            from_dict=False,
            value=default_category,
        ),
        html.Div(
            [
                get_item_radio_items(f"{order}_uni_or_multi_comparison",
                                     UNIVARIATE_OR_MULTIVARIATE,
                                     "Select the type of XWAS :"),
                get_item_radio_items(f"{order}_method_comparison",
                                     SUBSET_METHODS, "Select method :"),
                get_item_radio_items(f"{order}_correlation_type_comparison",
                                     CORRELATION_TYPES,
                                     "Select correlation type :"),
            ],
            id=f"{order}_hiden_settings",
            style={"display": "none"},
        ),
    ])
def get_controls_tab_category_multivariate():
    categories = pd.Index(MAIN_CATEGORIES_TO_CATEGORIES["All"]).drop(
        MULTIVARIATE_CATEGORIES_TO_REMOVE)

    return dbc.Card([
        get_item_radio_items(
            "main_category_category_multivariate",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down("category_category_multivariate",
                      categories,
                      "Select X subcategory: ",
                      from_dict=False),
        get_item_radio_items("order_type_category_multivariate", ORDER_TYPES,
                             "Order by:"),
        get_item_radio_items(
            "algorithm_category",
            {
                "elastic_net": ALGORITHMS["elastic_net"],
                "light_gbm": ALGORITHMS["light_gbm"],
                "neural_network": ALGORITHMS["neural_network"],
            },
            "Select an Algorithm :",
        ),
        get_item_radio_items("correlation_type_category_multivariate",
                             CORRELATION_TYPES, "Select correlation type :"),
    ])
Exemplo n.º 3
0
def get_controls_tab_bar_plot_multivariate_results():
    return dbc.Card([
        get_item_radio_items(
            "main_category_bar_plot_multivariate_results",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down(
            "dimension_bar_plot_multivariate_results",
            DIMENSIONS_SUBDIMENSIONS,
            "Select an aging dimension : ",
        ),
        get_item_radio_items(
            "display_mode_bar_plot_multivariate_results",
            DISPLAY_MODE,
            "Rank by : ",
        ),
        get_item_radio_items(
            "algorithm_bar_plot_multivariate_results",
            {
                "best_algorithm": ALGORITHMS["best_algorithm"],
                "elastic_net": ALGORITHMS["elastic_net"],
                "light_gbm": ALGORITHMS["light_gbm"],
                "neural_network": ALGORITHMS["neural_network"],
            },
            "Select an Algorithm :",
        ),
    ])
Exemplo n.º 4
0
def get_controls_tab_heatmap_multivariate_results():
    return dbc.Card([
        get_item_radio_items(
            "main_category_heatmap_multivariate_results",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_item_radio_items(
            "algorithm_heatmap_multivariate_results",
            {
                "best_algorithm": ALGORITHMS["best_algorithm"],
                "elastic_net": ALGORITHMS["elastic_net"],
                "light_gbm": ALGORITHMS["light_gbm"],
                "neural_network": ALGORITHMS["neural_network"],
            },
            "Select an Algorithm :",
        ),
        html.Div(
            [
                html.H5("Composition of the best algorithm"),
                dcc.Loading(
                    dcc.Graph(id="pie_chart_heatmap_multivariate_results",
                              config=DOWNLOAD_CONFIG)),
            ],
            id="div_pie_chart_heatmap_multivariate_results",
            style={"display": "none"},
        ),
    ])
Exemplo n.º 5
0
def get_controls_tab():
    return dbc.Card(
        [
            get_item_radio_items(
                "main_category_univariate_summary",
                list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
                "Select X main category: ",
                from_dict=False,
            ),
            get_item_radio_items("item_univariate_summary", ITEMS_LEGEND, "Select :"),
        ]
    )
Exemplo n.º 6
0
def get_controls_features_multivariate():
    return dbc.Card([
        get_item_radio_items(
            "main_category_features_multivariate",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down("category_features_multivariate", ["..."],
                      "Select X subcategory: ",
                      from_dict=False),
        get_drop_down("dimension_subdimension_features_multivariate",
                      DIMENSIONS_SUBDIMENSIONS, "Select an aging dimension: "),
    ])
def get_controls_tab_average_multivariate():
    main_dimensions_subdimension = {
        "MainDimensions": "MainDimensions",
        "SubDimensions": "SubDimensions"
    }
    main_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS)

    average_dimensions_subdimension = {"average": "average"}
    average_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS)

    return dbc.Card([
        get_item_radio_items(
            "main_category_average_multivariate",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down(
            "dimension_subdimension_1_average_multivariate",
            main_dimensions_subdimension,
            "Select an aging dimension 1: ",
        ),
        html.Div(
            [
                get_drop_down(
                    "dimension_subdimension_2_average_multivariate",
                    average_dimensions_subdimension,
                    "Select an aging dimension 2: ",
                )
            ],
            id="hiden_dimension_subdimension_2_average_multivariate",
            style={"display": "none"},
        ),
        get_item_radio_items(
            "display_mode_average_multivariate",
            DISPLAY_MODE,
            "Rank by : ",
        ),
        get_item_radio_items(
            "algorithm_average_multivariate",
            {
                "elastic_net": ALGORITHMS["elastic_net"],
                "light_gbm": ALGORITHMS["light_gbm"],
                "neural_network": ALGORITHMS["neural_network"],
            },
            "Select an algorithm :",
        ),
        get_item_radio_items("correlation_type_average_multivariate",
                             CORRELATION_TYPES, "Select correlation type :"),
    ])
Exemplo n.º 8
0
def get_controls_tab_univariate_category():
    return dbc.Card(
        [
            get_item_radio_items(
                "main_category_univariate_category",
                list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
                "Select X main category: ",
                from_dict=False,
            ),
            get_drop_down("category_univariate_category", ["All"], "Select X subcategory: ", from_dict=False),
            get_item_radio_items("order_type_univariate_category", ORDER_TYPES, "Order by:"),
            get_item_radio_items("subset_method_univariate_category", SUBSET_METHODS, "Select subset method :"),
            get_item_radio_items(
                "correlation_type_univariate_category", CORRELATION_TYPES, "Select correlation type :"
            ),
        ]
    )
Exemplo n.º 9
0
def get_controls_tab_univariate_volcano():
    return dbc.Card([
        get_item_radio_items(
            "main_category_univariate_volcano",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down("category_univariate_volcano", ["All"],
                      "Select X subcategory: ",
                      from_dict=False),
        get_drop_down(
            "dimension_univariate_volcano",
            DIMENSIONS_SUBDIMENSIONS,
            "Select an aging dimension: ",
        ),
    ])
Exemplo n.º 10
0
def get_controls_tab_univariate_average():
    main_dimensions_subdimension = {
        "MainDimensions": "MainDimensions",
        "SubDimensions": "SubDimensions"
    }
    main_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS)

    average_dimensions_subdimension = {"average": "average"}
    average_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS)

    return dbc.Card([
        get_item_radio_items(
            "main_category_univariate_average",
            list(MAIN_CATEGORIES_TO_CATEGORIES.keys()),
            "Select X main category: ",
            from_dict=False,
        ),
        get_drop_down(
            "dimension_subdimension_1_univariate_average",
            main_dimensions_subdimension,
            "Select an aging dimension 1: ",
        ),
        html.Div(
            [
                get_drop_down(
                    "dimension_subdimension_2_univariate_average",
                    average_dimensions_subdimension,
                    "Select an aging dimension 2: ",
                )
            ],
            id="hiden_dimension_subdimension_2_univariate_average",
            style={"display": "none"},
        ),
        get_item_radio_items(
            "display_mode_univariate_average",
            DISPLAY_MODE,
            "Rank by : ",
        ),
        get_item_radio_items("subset_method_univariate_average",
                             SUBSET_METHODS, "Select subset method :"),
        get_item_radio_items("correlation_type_univariate_average",
                             CORRELATION_TYPES, "Select correlation type :"),
    ])
Exemplo n.º 11
0
def _fill_graph_tab_univariate_average(
    subset_method,
    correlation_type,
    main_category,
    dimension_subdimension_1,
    dimension_subdimension_2,
    display_mode,
    data_correlations,
    data_averages,
):
    import plotly.graph_objs as go

    if main_category == "All":
        all_main_categories = [
            f"All_{main_cat}"
            for main_cat in MAIN_CATEGORIES_TO_CATEGORIES.keys()
        ]
    else:
        all_main_categories = [f"All_{main_category}"]

    if dimension_subdimension_2 == "average":
        averages = pd.DataFrame(data_averages).set_index(
            ["dimension", "category"])
        averages.columns = pd.MultiIndex.from_tuples(
            list(map(eval, averages.columns.tolist())),
            names=["subset_method", "correlation_type", "observation"])

        sorted_averages = averages.loc[(
            dimension_subdimension_1,
            MAIN_CATEGORIES_TO_CATEGORIES[main_category] +
            all_main_categories), (subset_method,
                                   correlation_type), ].sort_values(
                                       by=["mean"], ascending=False)

        if display_mode == "view_all":
            bars = go.Bar(
                x=sorted_averages.index.get_level_values("category"),
                y=sorted_averages["mean"],
                error_y={
                    "array": sorted_averages["std"],
                    "type": "data"
                },
                name="Average correlations",
                marker_color="indianred",
            )
        else:  # display_mode == view_per_main_category then main_category = All
            list_main_category = []
            list_categories = []
            # Get the ranking of subcategories per main category
            for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys():
                if main_category_group == "All":
                    continue
                sorted_categories = (sorted_averages.swaplevel().loc[
                    MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] +
                    [f"All_{main_category_group}"]].sort_values(
                        by=["mean"], ascending=False))
                sorted_index_categories = sorted_categories.index.get_level_values(
                    "category")

                list_categories.extend(sorted_index_categories)
                list_main_category.extend([main_category_group] *
                                          len(sorted_index_categories))

            bars = go.Bar(
                x=[
                    list_main_category + ["", "", ""], list_categories +
                    ["FamilyHistory", "Genetics", "Phenotypic"]
                ],
                y=sorted_averages["mean"].swaplevel()
                [list_categories +
                 ["FamilyHistory", "Genetics", "Phenotypic"]],
                error_y={
                    "array":
                    sorted_averages["std"].swaplevel()
                    [list_categories +
                     ["FamilyHistory", "Genetics", "Phenotypic"]],
                    "type":
                    "data",
                },
                name="Correlations",
                marker_color="indianred",
            )

        title = f"Average average correlation across aging dimensions and X categories = {sorted_averages['mean'].mean().round(3)} +- {sorted_averages['mean'].std().round(3)}"
        y_label = "Average correlation"
    else:
        correlations_raw = pd.DataFrame(data_correlations).set_index(
            ["dimension", "subdimension", "category"])
        correlations_raw.columns = pd.MultiIndex.from_tuples(
            list(map(eval, correlations_raw.columns.tolist())),
            names=["subset_method", "correlation_type"])
        correlations_raw.reset_index(inplace=True)
        correlations_raw["squeezed_dimension"] = correlations_raw[
            "dimension"] + correlations_raw["subdimension"].replace("*", "")
        correlations_raw = correlations_raw.drop(
            columns=["dimension", "subdimension"]).set_index(
                ["squeezed_dimension", "category"])

        sorted_correlations = correlations_raw.loc[(
            dimension_subdimension_2,
            MAIN_CATEGORIES_TO_CATEGORIES[main_category] +
            all_main_categories), (subset_method,
                                   correlation_type), ].sort_values(
                                       ascending=False)

        if display_mode == "view_decreasing":
            bars = go.Bar(
                x=sorted_correlations.index.get_level_values("category"),
                y=sorted_correlations,
                name="Correlations",
                marker_color="indianred",
            )
        else:  # display_mode == view_per_main_category
            list_main_category = []
            list_categories = []
            # Get the ranking of subcategories per main category
            for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys():
                if main_category_group == "All":
                    continue
                sorted_categories = (sorted_correlations.swaplevel().loc[
                    MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] +
                    [f"All_{main_category_group}"]].sort_values(
                        ascending=False))
                sorted_index_categories = sorted_categories.index.get_level_values(
                    "category")

                list_categories.extend(sorted_index_categories)
                list_main_category.extend([main_category_group] *
                                          len(sorted_index_categories))

            bars = go.Bar(
                x=[
                    list_main_category + ["", "", ""], list_categories +
                    ["FamilyHistory", "Genetics", "Phenotypic"]
                ],
                y=sorted_correlations.swaplevel()
                [list_categories +
                 ["FamilyHistory", "Genetics", "Phenotypic"]],
                name="Correlations",
                marker_color="indianred",
            )

        title = f"Average correlation = {sorted_correlations.mean().round(3)} +- {sorted_correlations.std().round(3)}"
        y_label = "Correlation"

    fig = go.Figure(bars)

    fig.update_layout({
        "width": 2000,
        "height": 800,
        "xaxis": {
            "title": "X subcategory",
            "tickangle": 90,
            "showgrid": False,
            "title_font": {
                "size": 25
            }
        },
        "yaxis": {
            "title": y_label,
            "title_font": {
                "size": 25
            }
        },
        "margin": {
            "l": 0,
            "r": 0,
            "b": 280,
            "t": 0
        },
    })

    return fig, title
        "MusculoskeletalKnees",
        "MusculoskeletalScalars",
        "MusculoskeletalSpine",
    ],
    "MusculoskeletalFullBody": ["Musculoskeletal"],
    "MusculoskeletalHips": ["Musculoskeletal"],
    "MusculoskeletalKnees": ["Musculoskeletal"],
    "MusculoskeletalScalars": ["Musculoskeletal"],
    "MusculoskeletalSpine": ["Musculoskeletal"],
    "PhysicalActivity": [],
}

FULL_CATEGORY = (MAIN_CATEGORIES_TO_CATEGORIES["All"] +
                 ["Phenotypic", "Genetics"] + [
                     f"All_{main_category}"
                     for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
                 ])

if __name__ == "__main__":
    correlations_raw = load_feather(
        f"xwas/univariate_correlations/correlations.feather").set_index([
            "dimension_1", "subdimension_1", "dimension_2", "subdimension_2",
            "category"
        ])
    correlations_raw.columns = pd.MultiIndex.from_tuples(
        list(map(eval, correlations_raw.columns.tolist())),
        names=["subset_method", "correlation_type"])
    correlations_raw.reset_index(inplace=True)
    for index_dimension in [1, 2]:
        correlations_raw[
            f"squeezed_dimension_{index_dimension}"] = correlations_raw[
def get_graph_average(dimension_1, data_averages):
    import plotly.graph_objs as go

    averages = pd.DataFrame(data_averages).set_index(["dimension", "category"])
    averages.columns = pd.MultiIndex.from_tuples(
        list(map(eval, averages.columns.tolist())),
        names=["subset_method", "correlation_type", "observation"])

    sorted_averages = averages.loc[(
        dimension_1,
        MAIN_CATEGORIES_TO_CATEGORIES["All"] + [
            f"All_{main_category}"
            for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
        ],
    ), ("union", "pearson"), ].sort_values(by=["mean"], ascending=False)

    list_main_category = []
    list_categories = []
    # Get the ranking of subcategories per main category
    for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys():
        if main_category_group == "All":
            continue
        sorted_categories = (sorted_averages.swaplevel().loc[
            MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] +
            [f"All_{main_category_group}"]].sort_values(by=["mean"],
                                                        ascending=False))
        print(
            "main_category",
            main_category_group,
            sorted_categories["mean"].mean().round(3),
            "+-",
            sorted_categories["mean"].std().round(3),
        )
        sorted_index_categories = sorted_categories.index.get_level_values(
            "category")

        list_categories.extend(sorted_index_categories)
        list_main_category.extend([main_category_group] *
                                  len(sorted_index_categories))

    bars = go.Bar(
        x=[
            list_main_category + ["", "", ""],
            list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]
        ],
        y=sorted_averages["mean"].swaplevel()[
            list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]],
        error_y={
            "array":
            sorted_averages["std"].swaplevel()[
                list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]],
            "type":
            "data",
        },
        name="Correlations",
        marker_color="indianred",
    )

    title = f"Average average correlation across aging dimensions and X categories = {sorted_averages['mean'].mean().round(3)} +- {sorted_averages['mean'].std().round(3)}"
    y_label = "Average correlation"

    fig = go.Figure(bars)

    fig.update_layout({
        "width": 2000,
        "height": 700,
        "xaxis": {
            "title": "X subcategory",
            "tickangle": 90,
            "showgrid": False,
            "title_font": {
                "size": 25
            }
        },
        "yaxis": {
            "title": y_label,
            "title_font": {
                "size": 25
            }
        },
        "margin": {
            "l": 0,
            "r": 0,
            "b": 0,
            "t": 0
        },
    })

    print(title)
    return fig
Exemplo n.º 14
0
def _fill_graph_tab_bar_plot(main_category, dimension, algorithm, display_mode,
                             data_scores):
    import plotly.graph_objs as go

    if algorithm == "best_algorithm":
        every_score_every_dimension = (pd.DataFrame(data_scores).groupby(
            by=["category", "dimension"]
        ).apply(lambda score_category_dimension: score_category_dimension.iloc[
            score_category_dimension["r2"].argmax()]).reset_index(drop=True))
        every_score = every_score_every_dimension.set_index(
            "dimension").loc[dimension].set_index("category")
    else:
        every_score = (pd.DataFrame(data_scores).set_index([
            "algorithm", "dimension"
        ]).loc[(algorithm, dimension)].reset_index().set_index("category"))

    multivariate_categories = MAIN_CATEGORIES_TO_CATEGORIES[
        main_category].copy()
    for to_remove in MULTIVARIATE_CATEGORIES_TO_REMOVE:
        if to_remove in multivariate_categories:
            multivariate_categories.remove(to_remove)

    scores = every_score.loc[multivariate_categories].sort_values(
        by=["r2"], ascending=False)

    hovertemplate = "X subcategory: %{x} <br>R²: %{y:.3f} +- %{customdata[0]:.3f} <br><extra>%{customdata[1]}</extra>"

    if display_mode == "view_decreasing":
        bars = go.Bar(
            x=scores.index,
            y=scores["r2"],
            error_y={
                "array": scores["std"],
                "type": "data"
            },
            name=SCORES["r2"],
            marker_color="indianred",
            hovertemplate=hovertemplate,
            customdata=scores[["std", "algorithm"]],
        )
    else:  # display_mode == view_per_main_category
        list_main_category = []
        list_categories = []
        # Get the ranking of subcategories per main category
        for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys():
            if main_category_group == "All":
                continue
            sorted_index_categories = (scores.loc[scores.index.isin(
                MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] +
                [f"All_{main_category_group}"])].sort_values(
                    by=["r2"], ascending=False)).index

            list_categories.extend(sorted_index_categories)
            list_main_category.extend([main_category_group] *
                                      len(sorted_index_categories))

        if main_category == "All":
            list_categories += ["FamilyHistory"]
            list_main_category += [""]

        bars = go.Bar(
            x=[list_main_category, list_categories],
            y=scores.loc[list_categories, "r2"],
            error_y={
                "array": scores["std"],
                "type": "data"
            },
            name=SCORES["r2"],
            marker_color="indianred",
            hovertemplate=hovertemplate,
            customdata=scores.loc[list_categories, ["std", "algorithm"]],
        )

    fig = go.Figure(bars)

    fig.update_layout({
        "height": 800,
        "xaxis": {
            "title": "X subcategory",
            "tickangle": 90,
            "showgrid": False,
            "title_font": {
                "size": 25
            }
        },
        "yaxis": {
            "title": SCORES["r2"],
            "title_font": {
                "size": 25
            }
        },
        "margin": {
            "l": 0,
            "r": 0,
            "b": 0,
            "t": 0
        },
    })

    return fig, f"Average {SCORES['r2']} = {scores['r2'].mean().round(3)} +- {scores['r2'].std().round(3)}"
Exemplo n.º 15
0
def _fill_heatmap_univariate_summary(item, main_category, data):
    import plotly.graph_objects as go

    if main_category == "All":
        list_categories = [
            f"All_{one_main_category}" for one_main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
        ] + list(pd.Index(MAIN_CATEGORIES_TO_CATEGORIES[main_category]).drop(["Genetics", "Phenotypic"]))
    else:
        list_categories = [f"All_{main_category}"] + MAIN_CATEGORIES_TO_CATEGORIES[main_category]

    summary = pd.DataFrame(data).set_index(["dimension", "category"])
    summary.columns = pd.MultiIndex.from_tuples(
        list(map(eval, summary.columns.tolist())), names=["item", "observation"]
    )

    summary_item_percentage = (
        100
        * summary.reset_index().pivot(
            index=[("dimension", "")], columns=[("category", "")], values=(item, "percentage")
        )
    ).astype(int)
    summary_item_percentage_category = summary_item_percentage[list_categories]
    summary_item_percentage_category.index.name = "dimension"
    summary_item_percentage_category.columns.name = "category"

    summary_item_number = summary.reset_index().pivot(
        index=[("dimension", "")], columns=[("category", "")], values=(item, "number")
    )
    summary_item_number_category = summary_item_number[list_categories]
    summary_item_percentage_category.index.name = "dimension"
    summary_item_percentage_category.columns.name = "category"

    hovertemplate = "<br>".join(
        [
            "X main category: %{x}",
            "Aging dimension: %{y}",
            f"{ITEMS_LEGEND[item]}: " + "%{customdata} ~ %{z} % of the variables",
            "<extra></extra>",
        ]
    )

    heatmap = go.Heatmap(
        z=summary_item_percentage_category,
        x=summary_item_percentage_category.columns,
        y=summary_item_percentage_category.index,
        customdata=summary_item_number_category,
        hovertemplate=hovertemplate,
        colorscale=ITEMS_COLORSCALE[item],
        zmin=0,
        zmax=100,
    )

    fig = go.Figure(heatmap)
    fig.update_layout(
        {
            "xaxis": {"title": "X subcategory", "tickangle": 90},
            "yaxis": {"title": "Aging dimension"},
            "width": max(30 * summary_item_percentage_category.shape[1], 500),
            "height": 30 * summary_item_percentage_category.shape[0],
            "xaxis_title_font": {"size": 25},
            "yaxis_title_font": {"size": 25},
            "margin": {"l": 0, "r": 0, "b": 0, "t": 0},
        }
    )

    return fig, ITEMS_TITLES[item]
    )
    correlations_cleaned_dimensions = correlations_cleaned_dimensions_1.rename(
        index=DICT_TO_CHANGE_DIMENSIONS, level="dimension_2"
    )
    correlations_cleaned = correlations_cleaned_dimensions.rename(index=DICT_TO_CHANGE_CATEGORIES, level="category")
    correlations_cleaned.reset_index().to_feather("data/xwas/univariate_correlations/correlations/correlations.feather")

    for dimension in DIMENSIONS:
        if dimension in DICT_TO_CHANGE_DIMENSIONS.keys():
            dimension = DICT_TO_CHANGE_DIMENSIONS[dimension]
        correlations_cleaned.loc[dimension].reset_index().rename(columns={"dimension_2": "dimension"}).to_feather(
            f"data/xwas/univariate_correlations/correlations/dimensions/correlations_{dimension}.feather"
        )

    for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [
        f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
    ]:
        if category in DICT_TO_CHANGE_CATEGORIES.keys():
            category = DICT_TO_CHANGE_CATEGORIES[category]
        correlations_cleaned.swaplevel().swaplevel(i=0, j=1).loc[category].reset_index().to_feather(
            f"data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather"
        )

    averages_correlations = load_feather("xwas/univariate_correlations/averages_correlations.feather")

    averages_correlations_cleaned_dimensions = averages_correlations.set_index(["dimension", "category"]).rename(
        index=DICT_TO_CHANGE_DIMENSIONS, level="dimension"
    )
    averages_correlations_cleaned = averages_correlations_cleaned_dimensions.rename(
        index=DICT_TO_CHANGE_CATEGORIES, level="category"
    )
Exemplo n.º 17
0
import pandas as pd
from tqdm import tqdm

from dash_website.utils.aws_loader import load_feather
from dash_website import DIMENSIONS, MAIN_CATEGORIES_TO_CATEGORIES


if __name__ == "__main__":
    list_indexes = []
    for dimension in DIMENSIONS + ["All_aging_dimensions"]:
        for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [
            f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
        ]:
            list_indexes.append([dimension, category])
    indexes = pd.MultiIndex.from_tuples(list_indexes, names=["dimension", "category"])

    list_columns = []
    for item in ["total", "significant", "accelerated_aging", "decelerated_aging"]:
        if item == "total":
            observations = ["total"]
        else:
            observations = ["number", "percentage"]
        for observation in observations:
            list_columns.append([item, observation])
    columns = pd.MultiIndex.from_tuples(list_columns, names=["item", "observation"])

    summary = pd.DataFrame(None, index=indexes, columns=columns)

    for dimension in tqdm(DIMENSIONS):
        correlations_dimension = load_feather(
            f"xwas/univariate_results/linear_correlations_{dimension}.feather",