Пример #1
0
    def test_localisation_compatibility(self):
        """
        Test the melted df_localisation has localisation terms which are ALL found in list all_localisations()
        """
        all_localisations_list_filtered = all_localisations()
        df_localisation_melt = semiology_lateralisation_localisation(test=True)

        mask = df_localisation_melt['Localisation'].isin(
            all_localisations_list_filtered)
        assert mask.all() == True
        print('compatible')
Пример #2
0
def NORMALISE_TO_LOCALISING_VALUES(inspect_result, type='all'):
    """
    Alter the DataFrame to normalise values to localising column value: i.e. conservation of localising semiology datapoints.
    based on spread of localisations.
    The more regions a semiology EZ/SOZ localises to, the lower its brain localising-value.
    Makes localisiing semiology the unit of analysis and when using type='all', also imputes possible missing data based
        on localisations (brain regions) present.
    See also tooltip in SVT for this option (SemiologyVisualisation module).

    As per call in semiology.py, this option is only utilised if granular/hierarchy-reversal is True
        OR if the low resolution option is selected (top_level_lobes_only).

    Also useful for Sankey diagram: preservation of 'Localising' flows.
        e.g. 1 localising value for epigastric localises to hippocampus (1) and amygdala (1) and Insula (1).
        With NLV, each of hippo, amygdala and insular values changes to 0.33 (1/3rd).
        This can create artefactually lower numbers for wide intralobar spreads (amygdala and hippo), but useful for conservation of localising value.

    Alim-Marvasti Sept 2020.
    """

    new_inspect_result = inspect_result.copy()

    # get all loc columns
    all_locs = all_localisations()
    locs = [i for i in new_inspect_result.columns if i in all_locs]

    # set index

    # Find semiology row's Localising sum and then divide by the sum of localising regions (e.g. FL and TL)
    # new_inspect_result.loc[:, 'ratio'] = np.nan
    new_inspect_result.loc[:, 'ratio'] = new_inspect_result['Localising'] / \
        new_inspect_result[locs].sum(axis=1)
    new_inspect_result = new_inspect_result.astype({'ratio': 'float'})

    # only change if ratio <1
    if type == 'ratio<1':
        gif_indices = (new_inspect_result['ratio'] < 1)
        if gif_indices.any():
            # df.multiply (not series.multiply). ratio is series. axis=0 otherwise deafult is columns.
            inspect_result.loc[gif_indices, locs] = \
                (new_inspect_result.loc[gif_indices, locs]).multiply(
                    new_inspect_result.loc[gif_indices, 'ratio'],
                    axis=0,
            )
    else:
        inspect_result.loc[:, locs] = \
            (new_inspect_result.loc[:, locs]).multiply(
                new_inspect_result.loc[:, 'ratio'], axis=0)

    # new_inspect_result.drop(columns='ratio', inplace=True)
    return inspect_result
import numpy as np
import pandas as pd
from mega_analysis.crosstab.all_localisations import all_localisations
from mega_analysis.crosstab.hierarchy_dictionaries import postcode_dictionaries

all_locs = all_localisations()
# hierarchy_dict = postcode_dictionaries()


class Hierarchy():
    """
    To reverse the postcode system. See docstrings below.
    """
    def __init__(self, original_df):
        self.original_df = original_df.copy()
        self.new_df = original_df.copy()
        self.localisation_columns = [
            col for col in original_df.columns if col in all_locs
        ]

    def hierarchy_reversal(self,
                           top_level_col,
                           low_level_cols,
                           option='max') -> pd.DataFrame:
        """
        Takes a df and returns a df

        Note that the postcode/hierarchy of localisations isn't completely invertible
        Hence, should have two options: conservative and max reversals. Default max.

        The .isin() method is so that if used on inspect_result rather than entire mega_analysis_df,
def summary_semio_loc_df_from_scripts(normalise=True):
    """
    Lots of copy pasting from scripts/figures.py in kd_figures-v3 branch.

    returns query_results which is a nested dictionary
        full
        spontaneous
        topology
            {semiologies}
                query_inspection
                num_query_loc
                num_query_lat
    """

    # Define paths
    repo_dir, resources_dir, excel_path, semiology_dict_path = file_paths()

    Semio2Brain_Database = excel_path
    with open(semiology_dict_path) as f:
        SemioDict = yaml.load(f, Loader=yaml.FullLoader)

    region_names = all_localisations()
    semiology_list = list(recursive_items(SemioDict))

    (original_df,
     df_ground_truth, df_study_type,
     num_database_articles, num_database_patients, num_database_lat, num_database_loc) = \
        MEGA_ANALYSIS(Semio2Brain_Database,
                      exclude_data=True)

    # -----------------------------------
    redistribution_spec = {
        'FT': ['FL', 'INSULA', 'Lateral Temporal', 'TL'],
        'TO': ['Lateral Temporal', 'TL', 'OL'],
        'TP': ['Lateral Temporal', 'TL', 'PL'],
        'FTP': ['INSULA', 'Lateral Temporal', 'TL', 'FL', 'PL'],
        'TPO Junction': ['Lateral Temporal', 'TL', 'PL', 'OL'],
        'PO': ['PL', 'OL'],
        'FP': ['FL', 'PL'],
        'Perisylvian': ['INSULA', 'Lateral Temporal', 'TL', 'FL', 'PL'],
        'Sub-Callosal Cortex': ['Ant Cing (frontal, genu)', 'CING']
    }
    redistributed_df = copy.deepcopy(original_df)
    # probably not needed as used exclude_data True when calling M_A
    redistributed_df = exclude_postictals(redistributed_df)

    for from_region, destination_regions in redistribution_spec.items():
        for destination in destination_regions:
            redistributed_df[destination] = original_df[destination].fillna(
                0) + original_df[from_region].fillna(0)
    redistributed_df = redistributed_df.drop(redistribution_spec.keys(),
                                             'columns')
    # -----------------------------------

    # region_names_re = region_names
    # region_names_re['top_level'] = ['TL',
    #                                 'FL',
    #                                 'CING',
    #                                 'PL',
    #                                 'OL',
    #                                 'INSULA',
    #                                 'Hypothalamus',
    #                                 'Cerebellum', ]
    # region_names_re['top_level_all_other'] = ['Cerebellum']

    df = copy.deepcopy(redistributed_df)
    df_SS = exclude_ET(df)
    df_SS = exclude_cortical_stimulation(df_SS)
    df_TS = exclude_spontaneous_semiology(df)

    all_dfs = {
        'full': df,
        'spontaneous': df_SS,
        'topology': df_TS,
    }

    query_results = {}
    for key, df in all_dfs.items():
        if normalise:
            df, _ = normalise_top_level_localisation_cols(df, Bayesian=True)
        query_results[key] = query_semiology_wrapper_from_scripts(
            df, semiology_list, semiology_dict_path)

    return query_results