def test_localisation_compatibility(self): """ Test the melted df_localisation has localisation terms which are ALL found in list all_localisations() """ all_localisations_list_filtered = all_localisations() df_localisation_melt = semiology_lateralisation_localisation(test=True) mask = df_localisation_melt['Localisation'].isin( all_localisations_list_filtered) assert mask.all() == True print('compatible')
def NORMALISE_TO_LOCALISING_VALUES(inspect_result, type='all'): """ Alter the DataFrame to normalise values to localising column value: i.e. conservation of localising semiology datapoints. based on spread of localisations. The more regions a semiology EZ/SOZ localises to, the lower its brain localising-value. Makes localisiing semiology the unit of analysis and when using type='all', also imputes possible missing data based on localisations (brain regions) present. See also tooltip in SVT for this option (SemiologyVisualisation module). As per call in semiology.py, this option is only utilised if granular/hierarchy-reversal is True OR if the low resolution option is selected (top_level_lobes_only). Also useful for Sankey diagram: preservation of 'Localising' flows. e.g. 1 localising value for epigastric localises to hippocampus (1) and amygdala (1) and Insula (1). With NLV, each of hippo, amygdala and insular values changes to 0.33 (1/3rd). This can create artefactually lower numbers for wide intralobar spreads (amygdala and hippo), but useful for conservation of localising value. Alim-Marvasti Sept 2020. """ new_inspect_result = inspect_result.copy() # get all loc columns all_locs = all_localisations() locs = [i for i in new_inspect_result.columns if i in all_locs] # set index # Find semiology row's Localising sum and then divide by the sum of localising regions (e.g. FL and TL) # new_inspect_result.loc[:, 'ratio'] = np.nan new_inspect_result.loc[:, 'ratio'] = new_inspect_result['Localising'] / \ new_inspect_result[locs].sum(axis=1) new_inspect_result = new_inspect_result.astype({'ratio': 'float'}) # only change if ratio <1 if type == 'ratio<1': gif_indices = (new_inspect_result['ratio'] < 1) if gif_indices.any(): # df.multiply (not series.multiply). ratio is series. axis=0 otherwise deafult is columns. inspect_result.loc[gif_indices, locs] = \ (new_inspect_result.loc[gif_indices, locs]).multiply( new_inspect_result.loc[gif_indices, 'ratio'], axis=0, ) else: inspect_result.loc[:, locs] = \ (new_inspect_result.loc[:, locs]).multiply( new_inspect_result.loc[:, 'ratio'], axis=0) # new_inspect_result.drop(columns='ratio', inplace=True) return inspect_result
import numpy as np import pandas as pd from mega_analysis.crosstab.all_localisations import all_localisations from mega_analysis.crosstab.hierarchy_dictionaries import postcode_dictionaries all_locs = all_localisations() # hierarchy_dict = postcode_dictionaries() class Hierarchy(): """ To reverse the postcode system. See docstrings below. """ def __init__(self, original_df): self.original_df = original_df.copy() self.new_df = original_df.copy() self.localisation_columns = [ col for col in original_df.columns if col in all_locs ] def hierarchy_reversal(self, top_level_col, low_level_cols, option='max') -> pd.DataFrame: """ Takes a df and returns a df Note that the postcode/hierarchy of localisations isn't completely invertible Hence, should have two options: conservative and max reversals. Default max. The .isin() method is so that if used on inspect_result rather than entire mega_analysis_df,
def summary_semio_loc_df_from_scripts(normalise=True): """ Lots of copy pasting from scripts/figures.py in kd_figures-v3 branch. returns query_results which is a nested dictionary full spontaneous topology {semiologies} query_inspection num_query_loc num_query_lat """ # Define paths repo_dir, resources_dir, excel_path, semiology_dict_path = file_paths() Semio2Brain_Database = excel_path with open(semiology_dict_path) as f: SemioDict = yaml.load(f, Loader=yaml.FullLoader) region_names = all_localisations() semiology_list = list(recursive_items(SemioDict)) (original_df, df_ground_truth, df_study_type, num_database_articles, num_database_patients, num_database_lat, num_database_loc) = \ MEGA_ANALYSIS(Semio2Brain_Database, exclude_data=True) # ----------------------------------- redistribution_spec = { 'FT': ['FL', 'INSULA', 'Lateral Temporal', 'TL'], 'TO': ['Lateral Temporal', 'TL', 'OL'], 'TP': ['Lateral Temporal', 'TL', 'PL'], 'FTP': ['INSULA', 'Lateral Temporal', 'TL', 'FL', 'PL'], 'TPO Junction': ['Lateral Temporal', 'TL', 'PL', 'OL'], 'PO': ['PL', 'OL'], 'FP': ['FL', 'PL'], 'Perisylvian': ['INSULA', 'Lateral Temporal', 'TL', 'FL', 'PL'], 'Sub-Callosal Cortex': ['Ant Cing (frontal, genu)', 'CING'] } redistributed_df = copy.deepcopy(original_df) # probably not needed as used exclude_data True when calling M_A redistributed_df = exclude_postictals(redistributed_df) for from_region, destination_regions in redistribution_spec.items(): for destination in destination_regions: redistributed_df[destination] = original_df[destination].fillna( 0) + original_df[from_region].fillna(0) redistributed_df = redistributed_df.drop(redistribution_spec.keys(), 'columns') # ----------------------------------- # region_names_re = region_names # region_names_re['top_level'] = ['TL', # 'FL', # 'CING', # 'PL', # 'OL', # 'INSULA', # 'Hypothalamus', # 'Cerebellum', ] # region_names_re['top_level_all_other'] = ['Cerebellum'] df = copy.deepcopy(redistributed_df) df_SS = exclude_ET(df) df_SS = exclude_cortical_stimulation(df_SS) df_TS = exclude_spontaneous_semiology(df) all_dfs = { 'full': df, 'spontaneous': df_SS, 'topology': df_TS, } query_results = {} for key, df in all_dfs.items(): if normalise: df, _ = normalise_top_level_localisation_cols(df, Bayesian=True) query_results[key] = query_semiology_wrapper_from_scripts( df, semiology_list, semiology_dict_path) return query_results