Пример #1
0
    def _run_univariate_tests(self, X, y, control='N2', n_jobs=-1):

        stats, pvals, _ = univariate_tests(
            X, y, control=control, test=self.test,
            comparison_type=self.comparison_type,
            multitest_correction=self.multitest_method,
            n_jobs=n_jobs)

        effects = get_effect_sizes(
            X, y, control=control, test=self.test,
            comparison_type=self.comparison_type)

        test_res = pd.DataFrame(pvals.min(axis=1), columns=['p-value'])

        # In most cases, the pvals and effects have the same shape
        # (when we do group-by-group comparisons, we get group-by-group
        # effect sizes too, and when we do multi-class comparisons we get one
        # effect size).
        # But for the Kruskal-Wallis case, we cannot get one effect size for the
        # test, so we get group-by-group effect sizes instead and keep the max.
        # In this case pvals has only one column, but effects has more than one
        # columns
        if pvals.shape==effects.shape:
            test_res['effect_size'] = effects.values[pvals.isin(pvals.min(axis=1)).values]
        else:
            test_res['effect_size'] = effects.max(axis=1)

        self.test_results = test_res

        return
Пример #2
0
def single_feature_window_mutant_worm_stats(metadata,
                                            features,
                                            save_dir,
                                            window=2,
                                            feature='motion_mode_paused_fraction',
                                            pvalue_threshold=0.05,
                                            fdr_method='fdr_by'):
    """ T-tests comparing BW vs fepD for each mutant worm """
    
    # 7 worm strains:       N2 vs 'cat-2', 'eat-4', 'osm-5', 'pdfr-1', 'tax-2', 'unc-25'
    # 2 bacteria strains:   BW vs fepD
    # 1 feature:            'motion_mode_paused_fraction'
    # 1 window:             2 (corresponding to 30 minutes on food, just after first BL stimulus)

    # focus on just one window = 30min just after BL (window=2)
    window_metadata = metadata[metadata['window']==window]

    # statistics: perform t-tests comparing fepD vs BW for each worm strain
    worm_strain_list = list(window_metadata['worm_strain'].unique())

    ttest_list = []
    for worm in worm_strain_list:
        worm_window_meta = window_metadata[window_metadata['worm_strain']==worm]
        worm_window_feat = features[[feature]].reindex(worm_window_meta.index)
        
        stats, pvals, reject = univariate_tests(X=worm_window_feat,
                                                y=worm_window_meta['bacteria_strain'],
                                                control='BW',
                                                test='t-test',
                                                comparison_type='binary_each_group',
                                                multitest_correction=fdr_method,
                                                alpha=PVAL_THRESH,
                                                n_permutation_test=None)

        # get effect sizes
        effect_sizes = get_effect_sizes(X=worm_window_feat, 
                                        y=worm_window_meta['bacteria_strain'],
                                        control='BW',
                                        effect_type=None,
                                        linked_test='t-test')
        
        # compile t-test results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        effect_sizes.columns = ['effect_size_' + str(c) for c in effect_sizes.columns]
        ttest_df = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    
        # record the worm strain as the index instead of the feature
        ttest_df = ttest_df.rename(index={feature:worm})
        ttest_list.append(ttest_df)

    ttest_path = Path(save_dir) / 'pairwise_ttests' /\
        'ttest_mutant_worm_fepD_vs_BW_window_{}_results.csv'.format(window)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    ttest_results = pd.concat(ttest_list, axis=0)
    ttest_results.to_csv(ttest_path, header=True, index=True)
        
    return
Пример #3
0
def dead_keio_stats(features, metadata, args):
    """ Perform statistical analyses on dead Keio experiment results:
        - t-tests for each feature comparing each strain vs control for paired antioxidant treatment conditions
        - t-tests for each feature comparing each strain antioxidant treatment to negative control (no antioxidant)
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - control_dict : dict
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    print("\nInvestigating variation in worm behaviour on dead vs alive hit Keio strains")  

    # assert there will be no errors due to case-sensitivity
    assert len(metadata[STRAIN_COLNAME].unique()) == len(metadata[STRAIN_COLNAME].str.upper().unique())
    assert all(type(b) == np.bool_ for b in metadata[TREATMENT_COLNAME].unique())
    
    # Load Tierpsy feature set + subset (columns) for selected features only
    features = select_feat_set(features, 'tierpsy_{}'.format(args.n_top_feats), append_bluelight=True)
    features = features[[f for f in features.columns if 'path_curvature' not in f]]
    
    assert not features.isna().any().any()
    #n_feats = features.shape[1]
    
    strain_list = list(metadata[STRAIN_COLNAME].unique())
    assert CONTROL_STRAIN in strain_list

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=[STRAIN_COLNAME, TREATMENT_COLNAME])
    print("Mean sample size of %s: %d" % (STRAIN_COLNAME, int(sample_size['n_samples'].mean())))
    
    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir =  save_dir / "Stats" / args.fdr_method 
    
    ##### ANOVA #####

    # make path to save ANOVA results
    test_path = stats_dir / 'ANOVA_results.csv'
    test_path.parent.mkdir(exist_ok=True, parents=True)

    # ANOVA across strains for significant feature differences
    if len(metadata[STRAIN_COLNAME].unique()) > 2:   
        stats, pvals, reject = univariate_tests(X=features, 
                                                y=metadata[STRAIN_COLNAME], 
                                                test='ANOVA',
                                                control=CONTROL_STRAIN,
                                                comparison_type='multiclass',
                                                multitest_correction=None, # uncorrected
                                                alpha=args.pval_threshold,
                                                n_permutation_test=None) # 'all'
    
        # get effect sizes
        effect_sizes = get_effect_sizes(X=features, 
                                        y=metadata[STRAIN_COLNAME], 
                                        control=CONTROL_STRAIN,
                                        effect_type=None,
                                        linked_test='ANOVA')
    
        # correct for multiple comparisons
        reject_corrected, pvals_corrected = _multitest_correct(pvals, 
                                                               multitest_method=args.fdr_method,
                                                               fdr=args.pval_threshold)
                                    
        # compile + save results (corrected)
        test_results = pd.concat([stats, effect_sizes, pvals_corrected, reject_corrected], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        test_results.to_csv(test_path, header=True, index=True)
        
        nsig = test_results['reject'].sum()
        print("%d features (%.f%%) signficantly different among '%s'" % (nsig, 
              len(test_results.index)/nsig, STRAIN_COLNAME))

    
    ##### t-tests #####

    for strain in strain_list:                                   
        strain_meta = metadata[metadata[STRAIN_COLNAME]==strain]
        strain_feat = features.reindex(strain_meta.index)
                     
        ### t-tests for each feature comparing live vs dead behaviour
    
        ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format((t_test + '_' + strain))
        ttest_path = stats_dir / '{}_results.csv'.format((t_test + '_' + strain))  
        ttest_path.parent.mkdir(exist_ok=True, parents=True)

        # perform t-tests (without correction for multiple testing)
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat, 
                                                      y=strain_meta[TREATMENT_COLNAME], 
                                                      control=CONTROL_TREATMENT, 
                                                      test=t_test,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=None, 
                                                      alpha=0.05)
        # get effect sizes for comparisons
        effect_sizes_t =  get_effect_sizes(X=strain_feat, 
                                           y=strain_meta[TREATMENT_COLNAME], 
                                           control=CONTROL_TREATMENT,
                                           effect_type=None,
                                           linked_test=t_test)
        
        # compile + save t-test results (uncorrected)
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
        
        # correct for multiple comparisons
        pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
        reject_t, pvals_t = _multitest_correct(pvals_t, 
                                               multitest_method=args.fdr_method,
                                               fdr=args.pval_threshold)

        # compile + save t-test results (corrected)
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_corrected.to_csv(ttest_path, header=True, index=True)

        # record t-test significant features (not ordered)
        fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
        #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
        print("%d significant features for %s on any %s vs %s (%s, %s, P<%.2f)" % (len(fset_ttest),
              strain, TREATMENT_COLNAME, CONTROL_TREATMENT, t_test, args.fdr_method, args.pval_threshold))

        if len(fset_ttest) > 0:
            ttest_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format((t_test + '_' + strain))
            write_list_to_file(fset_ttest, ttest_sigfeats_path)

    ##### for LIVE bacteria: compare each strain with control #####
    
    live_metadata = metadata[metadata['dead']==False]
    live_features = features.reindex(live_metadata.index)    

    ttest_path_uncorrected = stats_dir / '{}_live_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_live_results.csv'.format(t_test)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    
    # perform t-tests (without correction for multiple testing)   
    stats_t, pvals_t, reject_t = univariate_tests(X=live_features, 
                                                  y=live_metadata[STRAIN_COLNAME], 
                                                  control=CONTROL_STRAIN, 
                                                  test=t_test,
                                                  comparison_type='binary_each_group',
                                                  multitest_correction=None, 
                                                  alpha=0.05)
    
    # get effect sizes for comparisons
    effect_sizes_t =  get_effect_sizes(X=live_features, 
                                       y=live_metadata[STRAIN_COLNAME], 
                                       control=CONTROL_STRAIN,
                                       effect_type=None,
                                       linked_test=t_test)
    
    # compile + save t-test results (uncorrected)
    stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
    ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
    
    # correct for multiple comparisons
    pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
    reject_t, pvals_t = _multitest_correct(pvals_t, 
                                           multitest_method=args.fdr_method,
                                           fdr=args.pval_threshold)

    # compile + save t-test results (corrected)
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_corrected.to_csv(ttest_path, header=True, index=True)

    # record t-test significant features (not ordered)
    fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
    #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
    print("LIVE BACTERIA: %d significant features for any %s vs %s (%s, %s, P<%.2f)" %\
          (len(fset_ttest), STRAIN_COLNAME, CONTROL_STRAIN, t_test, args.fdr_method, 
           args.pval_threshold))

    if len(fset_ttest) > 0:
        ttest_sigfeats_path = stats_dir / '{}_live_sigfeats.txt'.format(t_test)
        write_list_to_file(fset_ttest, ttest_sigfeats_path)

    ##### for DEAD bacteria: compare each strain with control #####
    
    dead_metadata = metadata[metadata['dead']==True]
    dead_features = features.reindex(dead_metadata.index)    

    ttest_path_uncorrected = stats_dir / '{}_dead_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_dead_results.csv'.format(t_test)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    
    # perform t-tests (without correction for multiple testing)   
    stats_t, pvals_t, reject_t = univariate_tests(X=dead_features, 
                                                  y=dead_metadata[STRAIN_COLNAME], 
                                                  control=CONTROL_STRAIN, 
                                                  test=t_test,
                                                  comparison_type='binary_each_group',
                                                  multitest_correction=None, 
                                                  alpha=0.05)
    
    # get effect sizes for comparisons
    effect_sizes_t =  get_effect_sizes(X=dead_features, 
                                       y=dead_metadata[STRAIN_COLNAME], 
                                       control=CONTROL_STRAIN,
                                       effect_type=None,
                                       linked_test=t_test)
    
    # compile + save t-test results (uncorrected)
    stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
    ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
    
    # correct for multiple comparisons
    pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
    reject_t, pvals_t = _multitest_correct(pvals_t, 
                                           multitest_method=args.fdr_method,
                                           fdr=args.pval_threshold)

    # compile + save t-test results (corrected)
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_corrected.to_csv(ttest_path, header=True, index=True)

    # record t-test significant features (not ordered)
    fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
    #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
    print("DEAD BACTERIA: %d significant features for any %s vs %s (%s, %s, P<%.2f)" %\
          (len(fset_ttest), STRAIN_COLNAME, CONTROL_STRAIN, t_test, args.fdr_method, 
           args.pval_threshold))

    if len(fset_ttest) > 0:
        ttest_sigfeats_path = stats_dir / '{}_dead_sigfeats.txt'.format(t_test)
        write_list_to_file(fset_ttest, ttest_sigfeats_path)  
            # Create table to store statistics results
            grouped = feat_df.join(meta_df[GROUPING_VAR]).groupby(by=GROUPING_VAR)
            stats_table = grouped.mean().T
            mean_cols = ['mean ' + v for v in stats_table.columns.to_list()]
            stats_table.columns = mean_cols
            for group in grouped.size().index: # store sample size
                stats_table['sample size {}'.format(group)] = grouped.size().loc[group]
            
            # ANOVA / Kruskal-Wallis tests
            if (TEST_NAME == "ANOVA" or TEST_NAME == "Kruskal"):
                if len(run_strain_list) > 2:                    
                    stats, pvals, reject = univariate_tests(X=feat_df, 
                                                            y=meta_df[GROUPING_VAR], 
                                                            control=CONTROL, 
                                                            test=TEST_NAME,
                                                            comparison_type='multiclass',
                                                            multitest_correction=args.fdr_method, 
                                                            fdr=0.05,
                                                            n_jobs=-1)
                                                    
                    # Record name of statistical test used (kruskal/f_oneway)
                    col = '{} p-value'.format(TEST_NAME)
                    stats_table[col] = pvals.loc[stats_table.index, TEST_NAME]
        
                    # Sort pvals + record significant features
                    pvals = pvals.sort_values(by=[TEST_NAME], ascending=True)
                    fset = list(pvals.index[np.where(pvals < args.pval_threshold)[0]])
                    if len(fset) > 0:
                        print("\n%d significant features found by %s for %s (run %d, P<%.2f, %s)" %\
                              (len(fset), TEST_NAME, GROUPING_VAR, run, args.pval_threshold, 
                               args.fdr_method))
Пример #5
0
    # Drop feature columns with zero standard deviation
    features_df = feat_filter_std(features_df, threshold=0.0)

    # Fill in NaNs with global mean
    features_df = features_df.fillna(features_df.mean(axis=0))

    feature_list = features_df.columns.to_list()
    strain_list = list(metadata_df[args.strain_colname].unique())

    ### statistics
    # ANOVA to test to variation among strains
    if len(metadata_df[args.strain_colname].unique()) > 2:
        stats, pvals, reject = univariate_tests(
            X=features_df,
            y=metadata_df[args.strain_colname],
            control=args.control,
            test='ANOVA',
            comparison_type='multiclass',
            multitest_correction='fdr_by',
            alpha=0.05)
        # get effect sizes
        effect_sizes = get_effect_sizes(X=features_df,
                                        y=metadata_df[args.strain_colname],
                                        control=args.control,
                                        effect_type=None,
                                        linked_test='ANOVA')
        # compile + save results
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
        test_results = test_results.sort_values(by=['pvals'],
                                                ascending=True)  # rank pvals
        anova_save_path = save_dir / 'stats' / 'ANOVA_results.csv'
    # drop NaN entries
    eggs = eggs.dropna(subset=['gene_name', 'number_eggs_1hr'])

    strain_list = [CONTROL_STRAIN] + [
        s for s in eggs['gene_name'].unique() if s != CONTROL_STRAIN
    ]

    # 1. perform chi-sq tests to see if number of eggs laid is significantly different from control
    #    for any strain

    # perform ANOVA (correct for multiple comparisons) - is there variation in egg count across strains?
    stats, pvals, reject = univariate_tests(X=eggs[['number_eggs_1hr']],
                                            y=eggs['gene_name'],
                                            test='ANOVA',
                                            control=CONTROL_STRAIN,
                                            comparison_type='multiclass',
                                            multitest_correction='fdr_by',
                                            alpha=0.05,
                                            n_permutation_test=None)  # 'all'

    # get effect sizes
    effect_sizes = get_effect_sizes(X=eggs[['number_eggs_1hr']],
                                    y=eggs['gene_name'],
                                    control=CONTROL_STRAIN,
                                    effect_type=None,
                                    linked_test='ANOVA')

    # compile
    test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
    test_results = test_results.sort_values(by=['pvals'],
Пример #7
0
        subset=['gene_name', 'antioxidant', 'number_eggs_24hrs'])

    strain_list = [CONTROL_STRAIN] + [
        s for s in eggs['gene_name'].unique() if s != CONTROL_STRAIN
    ]
    antioxidant_list = [CONTROL_ANTIOXIDANT] + [
        a for a in eggs['antioxidant'].unique() if a != CONTROL_ANTIOXIDANT
    ]

    # 1. perform chi-sq tests to see if number of eggs laid is significantly different from control

    # perform ANOVA - is there variation in egg laying across antioxidants? (pooled strain data)
    stats, pvals, reject = univariate_tests(X=eggs[['number_eggs_24hrs']],
                                            y=eggs['antioxidant'],
                                            test='ANOVA',
                                            control=CONTROL_ANTIOXIDANT,
                                            comparison_type='multiclass',
                                            multitest_correction='fdr_by',
                                            alpha=0.05,
                                            n_permutation_test=None)  # 'all'

    # get effect sizes
    effect_sizes = get_effect_sizes(X=eggs[['number_eggs_24hrs']],
                                    y=eggs['antioxidant'],
                                    control=CONTROL_ANTIOXIDANT,
                                    effect_type=None,
                                    linked_test='ANOVA')

    # compile
    test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
    test_results = test_results.sort_values(by=['pvals'],
Пример #8
0
def acute_rescue_stats(features, 
                       metadata, 
                       save_dir, 
                       control_strain, 
                       control_antioxidant, 
                       control_window,
                       fdr_method='fdr_by',
                       pval_threshold=0.05):
    """ Pairwise t-tests for each window comparing worm 'motion mode paused fraction' on 
        Keio mutants vs BW control 
        
        # One could fit a multiple linear regression model: to account for strain*antioxidant in a 
        # single model: Y (motion_mode) = b0 + b1*X1 (strain) + b2*X2 (antiox) + e (error)
        # But this is a different type of question: we care about difference in means between 
        # fepD vs BW (albeit under different antioxidant treatments), and not about modelling their 
        # relationship, therefore individual t-tests (multiple-test-corrected) should suffice
        
        1. For each treatment condition, t-tests comparing fepD vs BW for motion_mode
        
        2. For fepD and BW separately, f-tests for equal variance among antioxidant treatment groups,
        then ANOVA tests for significant differences between antioxidants, then individual t-tests
        comparing each treatment to control
        
        Inputs
        ------
        features, metadata : pandas.DataFrame
        
        window_list : list
            List of windows (int) to perform statistics (separately for each window provided, 
            p-values are adjusted for multiple test correction)
        
        save_dir : str
            Directory to save statistics results
            
        control_strain
        control_antioxidant
        fdr_method
        
    """

    stats_dir =  Path(save_dir) / "Stats" / args.fdr_method
    stats_dir.mkdir(parents=True, exist_ok=True)

    strain_list = [control_strain] + [s for s in set(metadata['gene_name'].unique()) if s != control_strain]  
    antiox_list = [control_antioxidant] + [a for a in set(metadata['antioxidant'].unique()) if 
                                           a != control_antioxidant]
    window_list = [control_window] + [w for w in set(metadata['window'].unique()) if w != control_window]

    # categorical variables to investigate: 'gene_name', 'antioxidant' and 'window'
    print("\nInvestigating difference in fraction of worms paused between hit strain and control " +
          "(for each window), in the presence/absence of antioxidants:\n")    

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=['gene_name', 'antioxidant', 'window'])
    print("Mean sample size of strain/antioxidant for each window: %d" %\
          (int(sample_size['n_samples'].mean())))
      
    # For each strain separately...
    for strain in strain_list:
        strain_meta = metadata[metadata['gene_name']==strain]
        strain_feat = features.reindex(strain_meta.index)

        # 1. Is there any variation in fraction paused wtr antioxidant treatment?
        #    - ANOVA on pooled window data, then pairwise t-tests for each antioxidant
        
        print("Performing ANOVA on pooled window data for significant variation in fraction " +
              "of worms paused among different antioxidant treatments for %s..." % strain)
        
        # perform ANOVA (correct for multiple comparisons)             
        stats, pvals, reject = univariate_tests(X=strain_feat[[FEATURE]], 
                                                y=strain_meta['antioxidant'], 
                                                test='ANOVA',
                                                control=control_antioxidant,
                                                comparison_type='multiclass',
                                                multitest_correction=fdr_method,
                                                alpha=pval_threshold,
                                                n_permutation_test=None) # 'all'
    
        # get effect sizes
        effect_sizes = get_effect_sizes(X=strain_feat[[FEATURE]], 
                                        y=strain_meta['antioxidant'],
                                        control=control_antioxidant,
                                        effect_type=None,
                                        linked_test='ANOVA')
    
        # compile
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        
        # save results
        anova_path = Path(stats_dir) / 'ANOVA_{}_variation_across_antioxidants.csv'.format(strain)
        test_results.to_csv(anova_path, header=True, index=True)
              
        print("Performing t-tests comparing each antioxidant treatment to None (pooled window data)")
        
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat[[FEATURE]],
                                                      y=strain_meta['antioxidant'],
                                                      test='t-test',
                                                      control=control_antioxidant,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=fdr_method,
                                                      alpha=pval_threshold)
        effect_sizes_t =  get_effect_sizes(X=strain_feat[[FEATURE]], 
                                           y=strain_meta['antioxidant'], 
                                           control=control_antioxidant,
                                           effect_type=None,
                                           linked_test='t-test')
            
        # compile + save t-test results
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_results = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_save_path = stats_dir / 't-test_{}_antioxidant_results.csv'.format(strain)
        ttest_save_path.parent.mkdir(exist_ok=True, parents=True)
        ttest_results.to_csv(ttest_save_path, header=True, index=True)
    
        # 2. Is there any variation in fraction paused wrt window (time) across the videos?
        #    - ANOVA on pooled antioxidant data, then pairwise for each window
        
        print("Performing ANOVA on pooled antioxidant data for significant variation in fraction " +
              "of worms paused across (bluelight) window summaries for %s..." % strain)
        
        # perform ANOVA (correct for multiple comparisons)
        stats, pvals, reject = univariate_tests(X=strain_feat[[FEATURE]],
                                                y=strain_meta['window'],
                                                test='ANOVA',
                                                control=control_window,
                                                comparison_type='multiclass',
                                                multitest_correction=fdr_method,
                                                alpha=pval_threshold,
                                                n_permutation_test=None)
        
        # get effect sizes
        effect_sizes = get_effect_sizes(X=strain_feat[[FEATURE]],
                                        y=strain_meta['window'],
                                        control=control_window,
                                        effect_type=None,
                                        linked_test='ANOVA')

        # compile
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        
        # save results
        anova_path = Path(stats_dir) / 'ANOVA_{}_variation_across_windows.csv'.format(strain)
        test_results.to_csv(anova_path, header=True, index=True)

        print("Performing t-tests comparing each window with the first (pooled antioxidant data)")
        
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat[[FEATURE]],
                                                      y=strain_meta['window'],
                                                      test='t-test',
                                                      control=control_window,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=fdr_method,
                                                      alpha=pval_threshold)
        effect_sizes_t =  get_effect_sizes(X=strain_feat[[FEATURE]], 
                                           y=strain_meta['window'], 
                                           control=control_window,
                                           effect_type=None,
                                           linked_test='t-test')
            
        # compile + save t-test results
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_results = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_save_path = stats_dir / 't-test_{}_window_results.csv'.format(strain)
        ttest_save_path.parent.mkdir(exist_ok=True, parents=True)
        ttest_results.to_csv(ttest_save_path, header=True, index=True)   
         
    # Pairwise t-tests - is there a difference between strain vs control?

    control_meta = metadata[metadata['gene_name']==control_strain]
    control_feat = features.reindex(control_meta.index)
    control_df = control_meta.join(control_feat[[FEATURE]])

    for strain in strain_list[1:]: # skip control_strain at first index postion         
        strain_meta = metadata[metadata['gene_name']==strain]
        strain_feat = features.reindex(strain_meta.index)
        strain_df = strain_meta.join(strain_feat[[FEATURE]])

        # 3. Is there a difference between strain vs control at any window?
        
        print("\nPairwise t-tests for each window (pooled antioxidants) comparing fraction of " + 
              "worms paused on %s vs control:" % strain)

        stats, pvals, reject = pairwise_ttest(control_df, 
                                              strain_df, 
                                              feature_list=[FEATURE], 
                                              group_by='window', 
                                              fdr_method=fdr_method,
                                              fdr=0.05)
 
        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)
        
        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / 'window' /\
                            '{}_window_results.csv'.format(strain)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)
                             
        # for each antioxidant treatment condition...
        for antiox in antiox_list:
            print("Pairwise t-tests for each window comparing fraction of " + 
                  "worms paused on %s vs control with '%s'" % (strain, antiox))

            antiox_control_df = control_df[control_df['antioxidant']==antiox]
            antiox_strain_df = strain_df[strain_df['antioxidant']==antiox]
            
            stats, pvals, reject = pairwise_ttest(antiox_control_df,
                                                  antiox_strain_df,
                                                  feature_list=[FEATURE],
                                                  group_by='window',
                                                  fdr_method=fdr_method,
                                                  fdr=0.05)
        
            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)
            
            # save results
            ttest_strain_path = stats_dir / 'pairwise_ttests' / 'window' /\
                                '{0}_{1}_window_results.csv'.format(strain, antiox)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
            test_results.to_csv(ttest_strain_path, header=True, index=True)

        # 4. Is there a difference between strain vs control for any antioxidant?

        print("\nPairwise t-tests for each antioxidant (pooled windows) comparing fraction of " + 
              "worms paused on %s vs control:" % strain)

        stats, pvals, reject = pairwise_ttest(control_df, 
                                              strain_df, 
                                              feature_list=[FEATURE], 
                                              group_by='antioxidant', 
                                              fdr_method=fdr_method,
                                              fdr=0.05)
 
        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)
        
        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / 'antioxidant' /\
                            '{}_antioxidant_results.csv'.format(strain)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)
                             
        # For each window...
        for window in window_list:
            print("Pairwise t-tests for each antioxidant comparing fraction of " + 
                  "worms paused on %s vs control at window %d" % (strain, window))

            window_control_df = control_df[control_df['window']==window]
            window_strain_df = strain_df[strain_df['window']==window]
            
            stats, pvals, reject = pairwise_ttest(window_control_df,
                                                  window_strain_df,
                                                  feature_list=[FEATURE],
                                                  group_by='antioxidant',
                                                  fdr_method=fdr_method,
                                                  fdr=0.05)
        
            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)
            
            # save results
            ttest_strain_path = stats_dir / 'pairwise_ttests' / 'antioxidant' /\
                                '{0}_window{1}_antioxidant_results.csv'.format(strain, window)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
            test_results.to_csv(ttest_strain_path, header=True, index=True)
               
    return
def antioxidant_stats(features, metadata, args):
    """ Perform statistical analyses on Keio antioxidant rescue experiment results:
        - ANOVA tests for significant feature variation between strains (for each antioxidant treatment in turn)
        - ANOVA tests for significant feature variation in antioxidant treatment (for each strain in turn)
        - t-tests for each feature comparing each strain vs control for paired antioxidant treatment conditions
        - t-tests for each feature comparing each strain antioxidant treatment to negative control (no antioxidant)
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - control_dict : dict
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    # categorical variables to investigate: 'gene_name' and 'antioxidant'
    print(
        "\nInvestigating variation in worm behaviour on hit strains treated with different antioxidants"
    )

    # assert there will be no errors due to case-sensitivity
    assert len(metadata[STRAIN_COLNAME].unique()) == len(
        metadata[STRAIN_COLNAME].str.upper().unique())
    assert len(metadata[TREATMENT_COLNAME].unique()) == len(
        metadata[TREATMENT_COLNAME].str.upper().unique())

    assert not features.isna().any().any()

    strain_list = list(metadata[STRAIN_COLNAME].unique())
    antioxidant_list = list(metadata[TREATMENT_COLNAME].unique())
    assert CONTROL_STRAIN in strain_list and CONTROL_TREATMENT in antioxidant_list

    # print mean sample size
    sample_size = df_summary_stats(metadata,
                                   columns=[STRAIN_COLNAME, TREATMENT_COLNAME])
    print("Mean sample size of %s: %d" %
          (STRAIN_COLNAME, int(sample_size['n_samples'].mean())))

    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir = save_dir / "Stats" / args.fdr_method

    ### For each antioxidant treatment in turn...

    for antiox in antioxidant_list:
        print("\n%s" % antiox)
        meta_antiox = metadata[metadata[TREATMENT_COLNAME] == antiox]
        feat_antiox = features.reindex(meta_antiox.index)

        ### ANOVA tests for significant variation between strains

        # make path to save ANOVA results
        test_path_unncorrected = stats_dir / '{}_uncorrected.csv'.format(
            (args.test + '_' + antiox))
        test_path = stats_dir / '{}_results.csv'.format(
            (args.test + '_' + antiox))
        test_path.parent.mkdir(exist_ok=True, parents=True)

        if len(meta_antiox[STRAIN_COLNAME].unique()) > 2:
            # perform ANOVA + record results before & after correcting for multiple comparisons
            stats, pvals, reject = univariate_tests(
                X=feat_antiox,
                y=meta_antiox[STRAIN_COLNAME],
                test=args.test,
                control=CONTROL_STRAIN,
                comparison_type='multiclass',
                multitest_correction=None,  # uncorrected
                alpha=args.pval_threshold,
                n_permutation_test=None)  # 'all'

            # get effect sizes
            effect_sizes = get_effect_sizes(X=feat_antiox,
                                            y=meta_antiox[STRAIN_COLNAME],
                                            control=CONTROL_STRAIN,
                                            effect_type=None,
                                            linked_test=args.test)

            # compile + save results (uncorrected)
            test_results = pd.concat([stats, effect_sizes, pvals, reject],
                                     axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path_unncorrected,
                                header=True,
                                index=True)

            # correct for multiple comparisons
            reject_corrected, pvals_corrected = _multitest_correct(
                pvals,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save results (corrected)
            test_results = pd.concat(
                [stats, effect_sizes, pvals_corrected, reject_corrected],
                axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path, header=True, index=True)

            print(
                "%s differences in '%s' across strains on %s (%s, P<%.2f, %s)"
                % (("SIGNIFICANT" if reject_corrected.loc[FEATURE, args.test]
                    else "No significant"), FEATURE, antiox, args.test,
                   args.pval_threshold, args.fdr_method))
        else:
            print("\nWARNING: Not enough %s groups for %s (n=%d)" %\
                  (STRAIN_COLNAME, args.test, len(strain_list)))

        ### t-tests comparing each strain vs control for each antioxidant treatment conditions

        if len(meta_antiox[STRAIN_COLNAME].unique()) == 2 or (
                len(meta_antiox[STRAIN_COLNAME].unique()) > 2
                and reject_corrected.loc[FEATURE, args.test]):

            # t-test to use
            t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney'  # aka. Wilcoxon rank-sum
            ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format(
                (t_test + '_' + antiox))
            ttest_path = stats_dir / '{}_results.csv'.format(
                (t_test + '_' + antiox))
            ttest_path.parent.mkdir(exist_ok=True, parents=True)

            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(
                X=feat_antiox,
                y=meta_antiox[STRAIN_COLNAME],
                control=CONTROL_STRAIN,
                test=t_test,
                comparison_type='binary_each_group',
                multitest_correction=None,
                alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t = get_effect_sizes(X=feat_antiox,
                                              y=meta_antiox[STRAIN_COLNAME],
                                              control=CONTROL_STRAIN,
                                              effect_type=None,
                                              linked_test=t_test)

            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = [
                'effect_size_' + str(c) for c in effect_sizes_t.columns
            ]
            ttest_uncorrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected,
                                     header=True,
                                     index=True)

            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(
                pvals_t,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            nsig = reject_t.loc[FEATURE].sum()
            print("%d %ss differ from %s in '%s' on %s (%s, P<%.2f, %s)" %
                  (nsig, STRAIN_COLNAME, CONTROL_STRAIN, FEATURE, antiox,
                   t_test, args.pval_threshold, args.fdr_method))

    ### For each strain in turn...

    for strain in strain_list:
        print("\n%s" % strain)
        meta_strain = metadata[metadata[STRAIN_COLNAME] == strain]
        feat_strain = features.reindex(meta_strain.index)

        ### ANOVA tests for significant feature variation in antioxidant treatment

        # make path to save ANOVA results
        test_path_unncorrected = stats_dir / '{}_uncorrected.csv'.format(
            (args.test + '_' + strain))
        test_path = stats_dir / '{}_results.csv'.format(
            (args.test + '_' + strain))
        test_path.parent.mkdir(exist_ok=True, parents=True)

        if len(meta_strain[TREATMENT_COLNAME].unique()) > 2:
            # perform ANOVA + record results before & after correcting for multiple comparisons
            stats, pvals, reject = univariate_tests(
                X=feat_strain,
                y=meta_strain[TREATMENT_COLNAME],
                test=args.test,
                control=CONTROL_TREATMENT,
                comparison_type='multiclass',
                multitest_correction=None,  # uncorrected
                alpha=args.pval_threshold,
                n_permutation_test=None)  # 'all'

            # get effect sizes
            effect_sizes = get_effect_sizes(X=feat_strain,
                                            y=meta_strain[TREATMENT_COLNAME],
                                            control=CONTROL_TREATMENT,
                                            effect_type=None,
                                            linked_test=args.test)

            # compile + save results (uncorrected)
            test_results = pd.concat([stats, effect_sizes, pvals, reject],
                                     axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path_unncorrected,
                                header=True,
                                index=True)

            # correct for multiple comparisons
            reject_corrected, pvals_corrected = _multitest_correct(
                pvals,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save results (corrected)
            test_results = pd.concat(
                [stats, effect_sizes, pvals_corrected, reject_corrected],
                axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path, header=True, index=True)

            print("%s differences in '%s' across %ss for %s (%s, P<%.2f, %s)" %
                  (("SIGNIFICANT" if reject_corrected.loc[FEATURE, args.test]
                    else "No"), FEATURE, TREATMENT_COLNAME, strain, args.test,
                   args.pval_threshold, args.fdr_method))
        else:
            print("\nWARNING: Not enough %s groups for %s (n=%d)" %\
                  (TREATMENT_COLNAME, args.test, len(antioxidant_list)))

        ### t-tests comparing each antioxidant treatment to no antioxidant for each strain

        if len(meta_strain[TREATMENT_COLNAME].unique()) == 2 or (
                len(meta_strain[TREATMENT_COLNAME].unique()) > 2
                and reject_corrected.loc[FEATURE, args.test]):
            # t-test to use
            t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney'  # aka. Wilcoxon rank-sum
            ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format(
                (t_test + '_' + strain))
            ttest_path = stats_dir / '{}_results.csv'.format(
                (t_test + '_' + strain))
            ttest_path.parent.mkdir(exist_ok=True, parents=True)

            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(
                X=feat_strain,
                y=meta_strain[TREATMENT_COLNAME],
                control=CONTROL_TREATMENT,
                test=t_test,
                comparison_type='binary_each_group',
                multitest_correction=None,
                alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t = get_effect_sizes(X=feat_strain,
                                              y=meta_strain[TREATMENT_COLNAME],
                                              control=CONTROL_TREATMENT,
                                              effect_type=None,
                                              linked_test=t_test)

            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = [
                'effect_size_' + str(c) for c in effect_sizes_t.columns
            ]
            ttest_uncorrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected,
                                     header=True,
                                     index=True)

            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(
                pvals_t,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            nsig = reject_t.loc[FEATURE].sum()
            print("%d %ss differ from %s in '%s' for %s (%s, P<%.2f, %s)" %
                  (nsig, TREATMENT_COLNAME, CONTROL_TREATMENT, FEATURE, strain,
                   t_test, args.pval_threshold, args.fdr_method))

    ### Pairwise t-tests comparing strain vs control behaviour on each antioxidant
    print("\nPerforming pairwise t-tests:")
    # subset for control data
    control_strain_meta = metadata[metadata[STRAIN_COLNAME] == CONTROL_STRAIN]
    control_strain_feat = features.reindex(control_strain_meta.index)
    control_df = control_strain_meta.join(control_strain_feat)

    for strain in strain_list:
        if strain == CONTROL_STRAIN:
            continue

        # subset for strain data
        strain_meta = metadata[metadata[STRAIN_COLNAME] == strain]
        strain_feat = features.reindex(strain_meta.index)
        strain_df = strain_meta.join(strain_feat)

        # perform pairwise t-tests comparing strain with control for each antioxidant treatment
        stats, pvals, reject = pairwise_ttest(control_df,
                                              strain_df,
                                              feature_list=[FEATURE],
                                              group_by=TREATMENT_COLNAME,
                                              fdr_method=args.fdr_method,
                                              fdr=args.pval_threshold)

        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)

        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / '{}_results.csv'.format(
            strain + "_vs_" + CONTROL_STRAIN)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)

        for antiox in antioxidant_list:
            print("%s difference in '%s' between %s vs %s on %s (paired t-test, P=%.3f, %s)" %\
                  (("SIGNIFICANT" if reject.loc[FEATURE, 'reject_{}'.format(antiox)] else "No"),
                  FEATURE, strain, CONTROL_STRAIN, antiox, pvals.loc[FEATURE, 'pvals_{}'.format(antiox)],
                  args.fdr_method))
Пример #10
0
def keio_stats(features, metadata, args):
    """ Perform statistical analyses on Keio screen results:
        - ANOVA tests for significant between strain variation among all strains for each feature
        - t-tests for significant differences between each strain and control for each feature
        - k-significant feature selection for agreement with ANOVA significant feature set
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - omit_strains : list
            - grouping_variable : str
            - control_dict : dict
            - collapse_control : bool
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    # categorical variable to investigate, eg.'gene_name'
    grouping_var = args.grouping_variable
    print("\nInvestigating '%s' variation" % grouping_var)    

    # assert there will be no errors duee to case-sensitivity
    assert len(metadata[grouping_var].unique()) == len(metadata[grouping_var].str.upper().unique())
    
    # Subset results (rows) to omit selected strains
    if args.omit_strains is not None:
        features, metadata = subset_results(features, metadata, grouping_var, args.omit_strains)

    # Load Tierpsy Top feature set + subset (columns) for top feats only
    if args.n_top_feats is not None:
        top_feats_path = Path(args.tierpsy_top_feats_dir) / "tierpsy_{}.csv".format(str(args.n_top_feats))
        topfeats = load_topfeats(top_feats_path, add_bluelight=args.align_bluelight, 
                                 remove_path_curvature=True, header=None)
        
        # Drop features that are not in results
        top_feats_list = [feat for feat in list(topfeats) if feat in features.columns]
        features = features[top_feats_list]
    
    assert not features.isna().any().any()
    
    strain_list = list(metadata[grouping_var].unique())
    control = args.control_dict[grouping_var] # control strain to use
    assert control in strain_list
    
    if args.collapse_control:
        print("Collapsing control data (mean of each day)")
        features, metadata = average_plate_control_data(features, 
                                                        metadata, 
                                                        control=control, 
                                                        grouping_var=grouping_var, 
                                                        plate_var='imaging_plate_id')

    _ = df_summary_stats(metadata) # summary df # TODO: plot from?

    # Record mean sample size per group
    mean_sample_size = int(np.round(metadata.join(features).groupby([grouping_var], 
                                                                    as_index=False).size().mean()))
    print("Mean sample size: %d" % mean_sample_size)

    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir =  save_dir / grouping_var / "Stats" / args.fdr_method
    plot_dir = save_dir / grouping_var / "Plots" / args.fdr_method              

#%% F-test for equal variances

    # Compare variance in samples with control (and correct for multiple comparisons)
    # Sample size matters in that unequal variances don't pose a problem for a t-test with 
    # equal sample sizes. So as long as your sample sizes are equal, you don't have to worry about 
    # homogeneity of variances. If they are not equal, perform F-tests first to see if variance is 
    # equal before doing a t-test
    if args.f_test:
        levene_stats_path = stats_dir / 'levene_results.csv'
        levene_stats = levene_f_test(features, metadata, grouping_var, 
                                      p_value_threshold=args.pval_threshold, 
                                      multitest_method=args.fdr_method,
                                      saveto=levene_stats_path,
                                      del_if_exists=False)
        # if p < 0.05 then variances are not equal, and sample size matters
        prop_eqvar = (levene_stats['pval'] > args.pval_threshold).sum() / len(levene_stats['pval'])
        print("Percentage equal variance %.1f%%" % (prop_eqvar * 100))
          
#%% ANOVA / Kruskal-Wallis tests for significantly different features across groups

    test_path_unncorrected = stats_dir / '{}_results_uncorrected.csv'.format(args.test)
    test_path = stats_dir / '{}_results.csv'.format(args.test)
    
    if not (test_path.exists() and test_path_unncorrected.exists()):
        test_path.parent.mkdir(exist_ok=True, parents=True)
    
        if (args.test == "ANOVA" or args.test == "Kruskal"):
            if len(strain_list) > 2:   
                # perform ANOVA + record results before & after correcting for multiple comparisons               
                stats, pvals, reject = univariate_tests(X=features, 
                                                        y=metadata[grouping_var], 
                                                        control=control, 
                                                        test=args.test,
                                                        comparison_type='multiclass',
                                                        multitest_correction=None, # uncorrected
                                                        alpha=args.pval_threshold,
                                                        n_permutation_test='all')

                # get effect sizes
                effect_sizes = get_effect_sizes(X=features, 
                                                y=metadata[grouping_var],
                                                control=control,
                                                effect_type=None,
                                                linked_test=args.test)

                # compile + save results (uncorrected)
                test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
                test_results.columns = ['stats','effect_size','pvals','reject']     
                test_results['significance'] = sig_asterix(test_results['pvals'])
                test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
                test_results.to_csv(test_path_unncorrected, header=True, index=True)

                # correct for multiple comparisons
                reject_corrected, pvals_corrected = _multitest_correct(pvals, 
                                                                       multitest_method=args.fdr_method,
                                                                       fdr=args.pval_threshold)
                                            
                # compile + save results (corrected)
                test_results = pd.concat([stats, effect_sizes, pvals_corrected, reject_corrected], axis=1)
                test_results.columns = ['stats','effect_size','pvals','reject']     
                test_results['significance'] = sig_asterix(test_results['pvals'])
                test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
                test_results.to_csv(test_path, header=True, index=True)
        
                # use reject mask to find significant feature set
                fset = pvals.loc[reject[args.test]].sort_values(by=args.test, ascending=True).index.to_list()
                #assert set(fset) == set(anova_corrected['pvals'].index[np.where(anova_corrected['pvals'] < 
                #args.pval_threshold)[0]])

                if len(fset) > 0:
                    print("%d significant features found by %s for '%s' (P<%.2f, %s)" % (len(fset), 
                          args.test, grouping_var, args.pval_threshold, args.fdr_method))
                    anova_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(args.test)
                    write_list_to_file(fset, anova_sigfeats_path)
            else:
                fset = []
                print("\nWARNING: Not enough groups for %s for '%s' (n=%d groups)" %\
                      (args.test, grouping_var, len(strain_list)))
                    
#%% Linear Mixed Models (LMMs), accounting for day-to-day variation
        # NB: Ideally report:  parameter | beta | lower-95 | upper-95 | random effect (SD)
        elif args.test == 'LMM':
            with warnings.catch_warnings():
                # Filter warnings as parameter is often on the boundary
                warnings.filterwarnings("ignore")
                #warnings.simplefilter("ignore", ConvergenceWarning)
                (signif_effect, low_effect,  error, mask, pvals
                 ) = compounds_with_low_effect_univariate(feat=features, 
                                                          drug_name=metadata[grouping_var], 
                                                          drug_dose=None, 
                                                          random_effect=metadata[args.lmm_random_effect], 
                                                          control=control, 
                                                          test=args.test, 
                                                          comparison_type='multiclass',
                                                          multitest_method=args.fdr_method)
            assert len(error) == 0

            # save pvals
            pvals.to_csv(test_path_unncorrected, header=True, index=True)

            # save significant features -- if any strain significant for any feature
            fset = pvals.columns[(pvals < args.pval_threshold).any()].to_list()
            if len(fset) > 0:
                lmm_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(args.test)
                write_list_to_file(fset, lmm_sigfeats_path)

            # save significant effect strains
            if len(signif_effect) > 0:
                print(("%d significant features found (%d significant %ss vs %s control, "\
                      % (len(fset), len(signif_effect), grouping_var.replace('_',' '), 
                          control) if len(signif_effect) > 0 else\
                      "No significant differences found between %s "\
                      % grouping_var.replace('_',' '))
                      + "after accounting for %s variation, %s, P<%.2f, %s)"\
                      % (args.lmm_random_effect.split('_yyyymmdd')[0], args.test, 
                          args.pval_threshold, args.fdr_method))
                signif_effect_path = stats_dir / '{}_signif_effect_strains.txt'.format(args.test)
                write_list_to_file(signif_effect, signif_effect_path)
        
        else:
            raise IOError("Test '{}' not recognised".format(args.test))
    
#%% t-tests / Mann-Whitney tests
    
    # t-test to use        
    t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney' # aka. Wilcoxon rank-sum      
    ttest_path_uncorrected = stats_dir / '{}_results_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_results.csv'.format(t_test)               

    if not (ttest_path_uncorrected.exists() and ttest_path.exists()):    
        ttest_path.parent.mkdir(exist_ok=True, parents=True)

        if len(fset) > 0 or len(strain_list) == 2:
            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(X=features, 
                                                          y=metadata[grouping_var], 
                                                          control=control, 
                                                          test=t_test,
                                                          comparison_type='binary_each_group',
                                                          multitest_correction=None, 
                                                          alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t =  get_effect_sizes(X=features, 
                                               y=metadata[grouping_var], 
                                               control=control,
                                               effect_type=None,
                                               linked_test=t_test)
            
            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
            ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
            
            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(pvals_t, 
                                                   multitest_method=args.fdr_method,
                                                   fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            # record t-test significant features (not ordered)
            fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
            #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
            print("%d significant features found for any %s vs %s (%s, P<%.2f)" %\
                  (len(fset_ttest), grouping_var, control, t_test, args.pval_threshold))

            if len(fset_ttest) > 0:
                ttest_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(t_test)
                write_list_to_file(fset_ttest, ttest_sigfeats_path)
                                 
#%% K significant features
    
    ksig_uncorrected_path = stats_dir / 'k_significant_features_uncorrected.csv'
    ksig_corrected_path = stats_dir / 'k_significant_features.csv'
    if not (ksig_uncorrected_path.exists() and ksig_corrected_path.exists()):
        ksig_corrected_path.parent.mkdir(exist_ok=True, parents=True)      
        fset_ksig, (scores, pvalues_ksig), support = k_significant_feat(feat=features, 
                                                                        y_class=metadata[grouping_var], 
                                                                        k=len(fset),
                                                                        score_func='f_classif', 
                                                                        scale=None, 
                                                                        feat_names=None, 
                                                                        plot=False, 
                                                                        k_to_plot=None, 
                                                                        close_after_plotting=True,
                                                                        saveto=None, #k_sigfeat_dir
                                                                        figsize=None, 
                                                                        title=None, 
                                                                        xlabel=None)
        # compile + save k-significant features (uncorrected) 
        ksig_table = pd.concat([pd.Series(scores), pd.Series(pvalues_ksig)], axis=1)
        ksig_table.columns = ['scores','pvals']
        ksig_table.index = fset_ksig
        ksig_table.to_csv(ksig_uncorrected_path, header=True, index=True)   
        
        # Correct for multiple comparisons
        _, ksig_table['pvals'] = _multitest_correct(ksig_table['pvals'], 
                                                    multitest_method=args.fdr_method,
                                                    fdr=args.pval_threshold)
        
        # save k-significant features (corrected)
        ksig_table.to_csv(ksig_corrected_path, header=True, index=True)   

#%% mRMR feature selection: minimum Redunduncy, Maximum Relevance #####
    
    mrmr_dir = plot_dir / 'mrmr'
    mrmr_dir.mkdir(exist_ok=True, parents=True)
    mrmr_results_path = mrmr_dir / "mrmr_results.csv"

    if not mrmr_results_path.exists():
        estimator = Pipeline([('scaler', StandardScaler()), ('estimator', LogisticRegression())])
        y = metadata[grouping_var].values
        (mrmr_feat_set, 
         mrmr_scores, 
         mrmr_support) = mRMR_feature_selection(features, y_class=y, k=10,
                                                redundancy_func='pearson_corr',
                                                relevance_func='kruskal',
                                                n_bins=10, mrmr_criterion='MID',
                                                plot=True, k_to_plot=5, 
                                                close_after_plotting=True,
                                                saveto=mrmr_dir, figsize=None)
        # save results                                        
        mrmr_table = pd.concat([pd.Series(mrmr_feat_set), pd.Series(mrmr_scores)], axis=1)
        mrmr_table.columns = ['feature','score']
        mrmr_table.to_csv(mrmr_results_path, header=True, index=False)
        
        n_cv = 5
        cv_scores_mrmr = cross_val_score(estimator, features[mrmr_feat_set], y, cv=n_cv)
        cv_scores_mrmr = pd.DataFrame(cv_scores_mrmr, columns=['cv_score'])
        cv_scores_mrmr.to_csv(mrmr_dir / "cv_scores.csv", header=True, index=False)        
        print('MRMR CV Score: %f (n=%d)' % (np.mean(cv_scores_mrmr), n_cv))        
    else:
        # load mrmr results
        mrmr_table = pd.read_csv(mrmr_results_path)
        
    mrmr_feat_set = mrmr_table['feature'].to_list()
    print("\nTop %d features found by MRMR:" % len(mrmr_feat_set))
    for feat in mrmr_feat_set:
        print(feat)
Пример #11
0
def single_feature_window_stats(metadata,
                                features,
                                group_by,
                                control,
                                save_dir,
                                windows=None,
                                feat='motion_mode_paused_fraction',
                                pvalue_threshold=0.05,
                                fdr_method='fdr_by'):
    """ Pairwise t-tests for each window comparing a feature of worm behaviour on mutant strains 
        vs control 
        
        Parameters
        ----------
        metadata : pandas.DataFrame
        
        features : pandas.DataFrame
            Dataframe of compiled window summaries
            
        group_by : str
            Column name of variable containing control and other groups to compare, eg. 'gene_name'
            
        control : str
            Name of control group in 'group_by' column in metadata
            
        save_dir : str
            Path to directory to save results files
            
        windows : list
            List of window numbers at which to compare strains (corrected for multiple testing)
            
        feat : str
            Feature to test
        
        pvalue_threshold : float
            P-value significance threshold
            
        fdr_method : str
            Multiple testing correction method to use
    """

    import pandas as pd
    from pathlib import Path
    from statistical_testing.stats_helper import pairwise_ttest
    from statistical_testing.perform_keio_stats import df_summary_stats
    from visualisation.plotting_helper import sig_asterix
    from write_data.write import write_list_to_file
    from tierpsytools.analysis.statistical_tests import univariate_tests, get_effect_sizes

    # categorical variables to investigate: 'gene_name' and 'window'
    print(
        "\nInvestigating variation in fraction of worms paused between hit strains and control "
        + "(for each window)")

    # check there will be no errors due to case-sensitivity
    assert len(metadata[group_by].unique()) == len(
        metadata[group_by].str.upper().unique())

    # subset for list of windows
    if windows is None:
        windows = sorted(metadata['window'].unique())
    else:
        assert all(w in sorted(metadata['window'].unique()) for w in windows)
        metadata = metadata[metadata['window'].isin(windows)]
        features = features[[feat]].reindex(metadata.index)

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=[group_by, 'window'])
    print("Mean sample size of %s/window: %d" %
          (group_by, int(sample_size['n_samples'].mean())))

    control_meta = metadata[metadata[group_by] == control]
    control_feat = features.reindex(control_meta.index)
    control_df = control_meta.join(control_feat)

    n = len(metadata[group_by].unique())
    strain_list = list(
        [s for s in metadata[group_by].unique() if s != control])
    fset = []

    if n > 2:

        # Perform ANOVA - is there variation among strains at each window?
        anova_path = Path(
            save_dir) / 'ANOVA' / 'ANOVA_{}window_results.csv'.format(
                len(windows))
        anova_path.parent.mkdir(parents=True, exist_ok=True)

        stats, pvals, reject = univariate_tests(
            X=features,
            y=metadata[group_by],
            control=control,
            test='ANOVA',
            comparison_type='multiclass',
            multitest_correction=fdr_method,
            alpha=pvalue_threshold,
            n_permutation_test=None)

        # get effect sizes
        effect_sizes = get_effect_sizes(X=features,
                                        y=metadata[group_by],
                                        control=control,
                                        effect_type=None,
                                        linked_test='ANOVA')

        # compile + save results
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(
            by=['pvals'], ascending=True)  # rank by p-value
        test_results.to_csv(anova_path, header=True, index=True)

        # use reject mask to find significant feature set
        fset = pvals.loc[reject['ANOVA']].sort_values(
            by='ANOVA', ascending=True).index.to_list()

        if len(fset) > 0:
            print("%d significant features found by ANOVA for '%s' (P<%.2f, %s)" %\
                  (len(fset), group_by, pvalue_threshold, fdr_method))
            anova_sigfeats_path = anova_path.parent / 'ANOVA_sigfeats.txt'
            write_list_to_file(fset, anova_sigfeats_path)

    if n == 2 or len(fset) > 0:

        # pairwise t-tests
        for strain in strain_list:
            print(
                "\nPairwise t-tests for each window comparing fraction of worms paused "
                + "on %s vs control" % strain)

            ttest_strain_path = Path(save_dir) / 'pairwise_ttests' /\
                '{}_window_results.csv'.format(strain)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)

            strain_meta = metadata[metadata[group_by] == strain]
            strain_feat = features.reindex(strain_meta.index)
            strain_df = strain_meta.join(strain_feat[[feat]])

            stats, pvals, reject = pairwise_ttest(control_df,
                                                  strain_df,
                                                  feature_list=[feat],
                                                  group_by='window',
                                                  fdr_method=fdr_method,
                                                  fdr=pvalue_threshold)

            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)

            # save results
            test_results.to_csv(ttest_strain_path, header=True, index=True)

            for window in windows:
                print("%s difference in '%s' between %s vs %s in window %s (t-test, P=%.3f, %s)" %\
                      (("SIGNIFICANT" if reject.loc[feat, 'reject_{}'.format(window)] else "No"),
                      feat, strain, control, window, pvals.loc[feat, 'pvals_{}'.format(window)],
                      fdr_method))

    return
Пример #12
0
def control_variation(feat, 
                      meta, 
                      args,
                      variables=['date_yyyymmdd','instrument_name','imaging_run_number'],
                      n_sig_features=None):
    """ Analyse variation in control data with respect to each categorical variable in 'variables'
        
        Inputs
        ------
        feat, meta : pd.DataFrame
            Matching features summaries and metadata for control data
        
        args : Object 
            Python object with the following attributes:
            - remove_outliers : bool
            - grouping_variable : str
            - control_dict : dict
            - test : str
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int
            - n_top_feats : int
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            
        variables : list
            List of categorical random variables to analyse variation in control data
    """
    
    assert set(feat.index) == set(meta.index)
            
    save_dir = get_save_dir(args) / "control"

    # Stats test to use
    assert args.test in ['ANOVA','Kruskal','LMM']
    t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney' # aka. Wilcoxon rank-sums
    
    for grouping_var in tqdm(variables):
        
        # convert grouping variable column to factor (categorical)
        meta[grouping_var] = meta[grouping_var].astype(str)
                
        # get control group for eg. date_yyyymmdd
        control_group = str(args.control_dict[grouping_var])
        print("\nInvestigating variation in '%s' (control: '%s')" % (grouping_var, control_group))

        # Record mean sample size per group
        mean_sample_size = int(np.round(meta.groupby([grouping_var]).size().mean()))
        print("Mean sample size: %d" % mean_sample_size)
        
        group_list = list(meta[grouping_var].unique())
        stats_dir =  save_dir / "Stats" / grouping_var
        plot_dir = save_dir / "Plots" / grouping_var

        ##### STATISTICS #####
                      
        stats_path = stats_dir / '{}_results.csv'.format(args.test) # LMM/ANOVA/Kruskal  
        ttest_path = stats_dir / '{}_results.csv'.format(t_test)
    
        if not np.logical_and(stats_path.exists(), ttest_path.exists()):
            stats_path.parent.mkdir(exist_ok=True, parents=True)
            ttest_path.parent.mkdir(exist_ok=True, parents=True)
        
            ### ANOVA / Kruskal-Wallis tests for significantly different features across groups
            if (args.test == "ANOVA" or args.test == "Kruskal"):
                if len(group_list) > 2:                    
                    stats, pvals, reject = univariate_tests(X=feat, 
                                                            y=meta[grouping_var], 
                                                            control=control_group, 
                                                            test=args.test,
                                                            comparison_type='multiclass',
                                                            multitest_correction=args.fdr_method, 
                                                            alpha=0.05)
                    # get effect sizes
                    effect_sizes = get_effect_sizes(X=feat, 
                                                    y=meta[grouping_var],
                                                    control=control_group,
                                                    effect_type=None,
                                                    linked_test=args.test)
                                                
                    anova_table = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
                    anova_table.columns = ['stats','effect_size','pvals','reject']     

                    anova_table['significance'] = sig_asterix(anova_table['pvals'])
        
                    # Sort pvals + record significant features
                    anova_table = anova_table.sort_values(by=['pvals'], ascending=True)
                    fset = list(anova_table['pvals'].index[np.where(anova_table['pvals'] < 
                                                                    args.pval_threshold)[0]])
                    
                    # Save statistics results + significant feature set to file
                    anova_table.to_csv(stats_path, header=True, index=True)
            
                    if len(fset) > 0:
                        anova_sigfeats_path = Path(str(stats_path).replace('_results.csv', '_sigfeats.txt'))
                        write_list_to_file(fset, anova_sigfeats_path)
                        print("\n%d significant features found by %s for '%s' (P<%.2f, %s)" %\
                              (len(fset), args.test, grouping_var, args.pval_threshold, args.fdr_method))
                else:
                    fset = []
                    print("\nWARNING: Not enough groups for %s for '%s' (n=%d groups)" %\
                          (args.test, grouping_var, len(group_list)))
            
            ### t-tests / Mann-Whitney tests
            if len(fset) > 0 or len(group_list) == 2:
                stats_t, pvals_t, reject_t = univariate_tests(X=feat, 
                                                              y=meta[grouping_var], 
                                                              control=control_group, 
                                                              test=t_test,
                                                              comparison_type='binary_each_group',
                                                              multitest_correction=args.fdr_method, 
                                                              alpha=0.05)
                effect_sizes_t =  get_effect_sizes(X=feat, y=meta[grouping_var], 
                                                   control=control_group,
                                                   effect_type=None,
                                                   linked_test=t_test)
                
                stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
                pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
                reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
                effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
                
                ttest_table = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)

                # Record t-test significant feature set (NOT ORDERED)
                fset_ttest = list(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
                
                # Save t-test results to file
                ttest_table.to_csv(ttest_path, header=True, index=True) # Save test results to CSV

                if len(fset_ttest) > 0:
                    ttest_sigfeats_path = Path(str(ttest_path).replace('_results.csv', '_sigfeats.txt'))
                    write_list_to_file(fset_ttest, ttest_sigfeats_path)
                    print("%d signficant features found for any %s vs %s (%s, P<%.2f)" %\
                          (len(fset_ttest), grouping_var, control_group, t_test, args.pval_threshold))
                
                # Barplot of number of significantly different features for each strain   
                barplot_sigfeats(test_pvalues_df=pvals_t, 
                                 saveDir=plot_dir,
                                 p_value_threshold=args.pval_threshold,
                                 test_name=t_test)
                                 
        ### Load statistics results
        
        # Read ANOVA results and record significant features
        print("\nLoading statistics results")
        if len(group_list) > 2:
            anova_table = pd.read_csv(stats_path, index_col=0)
            pvals = anova_table.sort_values(by='pvals', ascending=True)['pvals']
            fset = pvals[pvals < args.pval_threshold].index.to_list()
            print("%d significant features found by %s (P<%.2f)" % (len(fset), args.test, 
                                                                    args.pval_threshold))
        
        # Read t-test results and record significant features (NOT ORDERED)
        ttest_table = pd.read_csv(ttest_path, index_col=0)
        pvals_t = ttest_table[[c for c in ttest_table if "pvals_" in c]]             
        fset_ttest = pvals_t[(pvals_t < args.pval_threshold).sum(axis=1) > 0].index.to_list()
        print("%d significant features found by %s (P<%.2f)" % (len(fset_ttest), t_test, args.pval_threshold))
            
        # Use t-test significant feature set if comparing just 2 strains
        if len(group_list) == 2:
            fset = fset_ttest
                       
        if not n_sig_features:
            if args.n_sig_features is not None:
                n_sig_features = args.n_sig_features 
            else:
                n_sig_features = len(fset)
                                   
        ##### Plotting #####
        
        superplot_dir = plot_dir / 'superplots' 

        if len(fset) > 1:        
            for feature in tqdm(fset[:n_sig_features]):                
                # plot variation in variable with respect to 'date_yyyymmdd'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'date_yyyymmdd' else 'date_yyyymmdd',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'date_yyyymmdd' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                # plot variation in variable with respect to 'instrument_name'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'instrument_name' else 'instrument_name',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'instrument_name' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                # plot variation in variable with respect to 'imaging_ruun_number'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'imaging_run_number' else 'imaging_run_number',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'imaging_run_number' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                            
            # # Boxplots of significant features by ANOVA/LMM (all groups)
            # boxplots_grouped(feat_meta_df=meta.join(feat), 
            #                  group_by=grouping_var,
            #                  control_group=str(control_group),
            #                  test_pvalues_df=pvals_t.T, # ranked by test pvalue significance
            #                  feature_set=fset,
            #                  saveDir=(plot_dir / 'grouped_boxplots'),
            #                  max_feats2plt=args.n_sig_features, 
            #                  max_groups_plot_cap=None,
            #                  p_value_threshold=args.pval_threshold,
            #                  drop_insignificant=False,
            #                  sns_colour_palette="tab10",
            #                  figsize=[6, (len(group_list)/3 if len(group_list)>10 else 12)])
                    
            # Individual boxplots of significant features by pairwise t-test (each group vs control)
            # boxplots_sigfeats(feat_meta_df=meta.join(feat), 
            #                   test_pvalues_df=pvals_t, 
            #                   group_by=grouping_var, 
            #                   control_strain=control_group, 
            #                   feature_set=fset, #['speed_norm_50th_bluelight'],
            #                   saveDir=plot_dir / 'paired_boxplots',
            #                   max_feats2plt=args.n_sig_features,
            #                   p_value_threshold=args.pval_threshold,
            #                   drop_insignificant=True,
            #                   verbose=False)
                
            # from tierpsytools.analysis.significant_features import plot_feature_boxplots
            # plot_feature_boxplots(feat_to_plot=fset,
            #                       y_class=grouping_var,
            #                       scores=pvalues.rank(axis=1),
            #                       feat=feat,
            #                       pvalues=np.asarray(pvalues).flatten(),
            #                       saveto=None,
            #                       close_after_plotting=False)
        
        ##### Hierarchical Clustering Analysis #####
        print("\nPerforming hierarchical clustering analysis...")

        assert not feat.isna().sum(axis=1).any()
        assert not (feat.std(axis=1) == 0).any()
        
        # Z-normalise data
        featZ = feat.apply(zscore, axis=0)
        #featZ = (feat-feat.mean())/feat.std() # minus mean, divide by std
        
        #from tierpsytools.preprocessing.scaling_class import scalingClass
        #scaler = scalingClass(scaling='standardize')
        #featZ = scaler.fit_transform(feat)

        # NOT NEEDED?
        # # Drop features with NaN values after normalising
        # n_cols = len(featZ.columns)
        # featZ.dropna(axis=1, inplace=True)
        # n_dropped = n_cols - len(featZ.columns)
        # if n_dropped > 0:
        #     print("Dropped %d features after normalisation (NaN)" % n_dropped)
    
        ### Control clustermap
        
        # control data is clustered and feature order is stored and applied to full data
        if len(group_list) > 1 and len(group_list) < 50 and grouping_var != 'date_yyyymmdd':
            control_clustermap_path = plot_dir / 'heatmaps' / (grouping_var + '_date_clustermap.pdf')
            cg = plot_clustermap(featZ, meta,
                                 group_by=([grouping_var] if grouping_var == 'date_yyyymmdd' 
                                           else [grouping_var, 'date_yyyymmdd']),
                                 col_linkage=None,
                                 method=METHOD,#[linkage, complete, average, weighted, centroid]
                                 metric=METRIC,
                                 figsize=[15,8],
                                 sub_adj={'bottom':0.02,'left':0.02,'top':1,'right':0.85},
                                 label_size=12,
                                 show_xlabels=False,
                                 saveto=control_clustermap_path)
    
            #col_linkage = cg.dendrogram_col.calculated_linkage
            clustered_features = np.array(featZ.columns)[cg.dendrogram_col.reordered_ind]
        else:
            clustered_features = None
                    
        ## Save z-normalised values
        # z_stats = featZ.join(meta[grouping_var]).groupby(by=grouping_var).mean().T
        # z_stats.columns = ['z-mean_' + v for v in z_stats.columns.to_list()]
        # z_stats.to_csv(z_stats_path, header=True, index=None)
        
        # Clustermap of full data       
        full_clustermap_path = plot_dir / 'heatmaps' / (grouping_var + '_clustermap.pdf')
        fg = plot_clustermap(featZ, meta, 
                             group_by=grouping_var,
                             col_linkage=None,
                             method=METHOD,
                             metric=METRIC,
                             figsize=[15,8],
                             sub_adj={'bottom':0.02,'left':0.02,'top':1,'right':0.9},
                             label_size=12,
                             saveto=full_clustermap_path)
        
        # If no control clustering (due to no day variation) then use clustered features for all 
        # strains to order barcode heatmaps
        if clustered_features is None:
            clustered_features = np.array(featZ.columns)[fg.dendrogram_col.reordered_ind]
        
        if len(group_list) > 2:
            pvals_heatmap = anova_table.loc[clustered_features, 'pvals']
        elif len(group_list) == 2:
            pvals_heatmap = pvals_t.loc[clustered_features, pvals_t.columns[0]]
        pvals_heatmap.name = 'P < {}'.format(args.pval_threshold)
    
        assert all(f in featZ.columns for f in pvals_heatmap.index)
                
        # Plot barcode heatmap (grouping by date)
        if len(group_list) > 1 and len(group_list) < 50 and grouping_var != 'date_yyyymmdd':
            heatmap_date_path = plot_dir / 'heatmaps' / (grouping_var + '_date_heatmap.pdf')
            plot_barcode_heatmap(featZ=featZ[clustered_features], 
                                 meta=meta, 
                                 group_by=[grouping_var, 'date_yyyymmdd'],
                                 pvalues_series=pvals_heatmap,
                                 p_value_threshold=args.pval_threshold,
                                 selected_feats=fset if len(fset) > 0 else None,
                                 saveto=heatmap_date_path,
                                 figsize=[20,7],
                                 sns_colour_palette="Pastel1")
        
        # Plot group-mean heatmap (averaged across days)
        heatmap_path = plot_dir / 'heatmaps' / (grouping_var + '_heatmap.pdf')
        plot_barcode_heatmap(featZ=featZ[clustered_features], 
                             meta=meta, 
                             group_by=[grouping_var], 
                             pvalues_series=pvals_heatmap,
                             p_value_threshold=args.pval_threshold,
                             selected_feats=fset if len(fset) > 0 else None,
                             saveto=heatmap_path,
                             figsize=[20, (int(len(group_list) / 4) if len(group_list) > 10 else 6)],
                             sns_colour_palette="Pastel1")        
                        
        ##### Principal Components Analysis #####
        print("Performing principal components analysis")
    
        if args.remove_outliers:
            outlier_path = plot_dir / 'mahalanobis_outliers.pdf'
            feat, inds = remove_outliers_pca(df=feat, 
                                             features_to_analyse=None, 
                                             saveto=outlier_path)
            meta = meta.reindex(feat.index) # reindex metadata
            featZ = feat.apply(zscore, axis=0) # re-normalise data

            # Drop features with NaN values after normalising
            n_cols = len(featZ.columns)
            featZ.dropna(axis=1, inplace=True)
            n_dropped = n_cols - len(featZ.columns)
            if n_dropped > 0:
                print("Dropped %d features after normalisation (NaN)" % n_dropped)

        #from tierpsytools.analysis.decomposition import plot_pca
        pca_dir = plot_dir / 'PCA'
        _ = plot_pca(featZ, meta, 
                     group_by=grouping_var, 
                     control=control_group,
                     var_subset=None, 
                     saveDir=pca_dir,
                     PCs_to_keep=10,
                     n_feats2print=10,
                     sns_colour_palette="plasma",
                     n_dims=2,
                     label_size=15,
                     figsize=[9,8],
                     sub_adj={'bottom':0.13,'left':0.12,'top':0.98,'right':0.98},
                     # legend_loc='upper right',
                     # n_colours=20,
                     hypercolor=False)