def test_run_with_kegg_returns_dicts_of_dataframes_with_shape(self):
     results = _.all(expression_table=self.expression_table, )
     self.assertIsInstance(results, dict)
     self.assertIsInstance(results['harmonic'], pd.DataFrame)
     self.assertEqual(results['harmonic'].shape, (324, 3))
     self.assertEqual(results['geometric'].shape, (324, 3))
     self.assertEqual(results['min_p_val'].shape, (324, 3))
示例#2
0
 def test_pathway_assessor_returns_none_if_statistic_is_set_to_false(self):
     results = _.all(
         expression_table=self.expression_table,
         pathways=self.user_pathway_db,
         geometric=False,
         min_p_val=False,
     )
     self.assertIsNone(results['geometric'])
     self.assertIsNone(results['min_p_val'])
 def test_pathway_assessor_returns_three_dataframes_of_expected_values(
         self):
     user_pathways = {
         'Sample_pathway': ['SLC2A6', 'PHOSPHO1', 'PIKFYVE', 'VHL']
     }
     results = _.all(expression_table=self.expression_table,
                     pathways=user_pathways)
     harmonic_avg = results['harmonic'].loc['Sample_pathway'].to_dict()
     geometric_avg = results['geometric'].loc['Sample_pathway'].to_dict()
     min_p_vals = results['min_p_val'].loc['Sample_pathway'].to_dict()
     expected_harmonic_avg = {
         'Sample_A': 8.610084236222475,
         'Sample_B': 9.519349644266763,
         'Sample_C': 6.02244237008846
     }
     expected_geometric_avg = {
         'Sample_A': 5.916555748731008,
         'Sample_B': 7.153859966001466,
         'Sample_C': 4.75439878004548
     }
     expected_min_p_vals = {
         'Sample_A': 9.690912952571226,
         'Sample_B': 10.576744476960645,
         'Sample_C': 6.695180679017199
     }
     self.assertEqual(len(results), 3)
     self.assertAlmostEqual(harmonic_avg['Sample_A'],
                            expected_harmonic_avg['Sample_A'])
     self.assertAlmostEqual(harmonic_avg['Sample_B'],
                            expected_harmonic_avg['Sample_B'])
     self.assertAlmostEqual(harmonic_avg['Sample_C'],
                            expected_harmonic_avg['Sample_C'])
     self.assertAlmostEqual(geometric_avg['Sample_A'],
                            expected_geometric_avg['Sample_A'])
     self.assertAlmostEqual(geometric_avg['Sample_B'],
                            expected_geometric_avg['Sample_B'])
     self.assertAlmostEqual(geometric_avg['Sample_C'],
                            expected_geometric_avg['Sample_C'])
     self.assertAlmostEqual(min_p_vals['Sample_A'],
                            expected_min_p_vals['Sample_A'])
     self.assertAlmostEqual(min_p_vals['Sample_B'],
                            expected_min_p_vals['Sample_B'])
     self.assertAlmostEqual(min_p_vals['Sample_C'],
                            expected_min_p_vals['Sample_C'])
示例#4
0

def write_table(score_type):
    if ascending:
        direction = 'suppression'
    else:
        direction = 'activation'
    output_f = '{}/scores/{}_{}_{}_{}'.format(examples_dir, tumor, pw_name, direction, score_type)
    scores[score_type].to_csv(output_f, sep='\t')
    print('Finished: {}'.format(output_f))


if __name__ == '__main__':
    pa_home = os.getenv('PATHWAY_ASSESSOR_HOME')
    tumor = 'blca_normal_slim'
    pw_name = 'kegg_immune'
    db_dir = '{}/pathway_assessor/databases/as_tables'.format(pa_home)
    ascending = True

    examples_dir = os.path.dirname(os.path.abspath(__file__))
    expression_table_f = '{}/{}'.format(examples_dir, tumor)
    pathways_f = '{}/{}.tsv'.format(db_dir, pw_name)

    expression_table = pd.read_csv(expression_table_f, sep='\t', index_col=0)
    pathways = pathways_dict(pathways_f)

    scores = pa.all(expression_table=expression_table, ascending=ascending, pathways=pathways)

    for method in scores:
        write_table(method)
示例#5
0
    else:
        user_pw_db_f = None
        output_dir = output_dir_path(base_dir, expression_table_f, pathway_db_choice, ascending, rank_method)

    os.makedirs(output_dir, exist_ok=True)

    expression_df = pd.read_csv(expression_table_f, sep='\t', index_col=0)

    if pathway_db_choice == 'xcell' \
            or pathway_db_choice == 'xcell_complete_signatures' \
            or pathway_db_choice == 'all' \
            or pathway_db_choice == 'wikipathways' \
            or pathway_db_choice == 'immune_all':
        user_pathways = pickle.load(open('databases/{}.pkl'.format(pathway_db_choice), 'rb'))
        scores = pa.all(expression_table=expression_df,
                        pathways=user_pathways,
                        ascending=ascending,
                        rank_method=rank_method)
    elif not user_pw_db_f:
        scores = pa.all(expression_table=expression_df,
                        db=pathway_db_choice,
                        ascending=ascending,
                        rank_method=rank_method)
    else:
        user_pathways = user_pathways_dict(user_pw_db_f)
        scores = pa.all(expression_table=expression_df,
                        pathways=user_pathways,
                        ascending=ascending,
                        rank_method=rank_method)

    for score_type in scores:
        write_score_table(output_dir_path=output_dir, score_type=score_type, df=scores[score_type])
示例#6
0
import pandas as pd
import pathway_assessor as pa

expression_table = "C:\\Users\\Boris\\Desktop\\PW_A_Local\\ucec.normal.txt"

expression_df = pd.read_csv(expression_table, sep='\t', index_col=0)

scores = pa.all(expression_table=expression_df,
                db='hallmark',
                ascending=True,
                rank_method='max')

print(scores)