Exemplo n.º 1
0
def generate_annotated_hpo_network(obo_file,
                                   disease_to_phenotype_file,
                                   annotations_file=None,
                                   ages_distribution_file=None):
    hpo_network = load_network(obo_file)

    alt2prim = generate_alternate_ids(hpo_network)

    # load phenotypes to diseases associations
    (
        disease_records,
        phenotype_to_diseases,
    ) = load_d2p(disease_to_phenotype_file, hpo_network, alt2prim)

    # load hpo network
    hpo_network = annotate(
        hpo_network,
        phenotype_to_diseases,
        len(disease_records),
        alt2prim,
        annotations_file=annotations_file,
        ages_distribution_file=ages_distribution_file,
    )

    return hpo_network, alt2prim, disease_records
Exemplo n.º 2
0
    def test_ic_custom(self):
        """Calculate the information content of a phenotype when multiple annotations are present"""
        custom_annotation_file = os.path.join(self.parent_dir, 'data/test.score-long.txt')
        hpo_network = load_network(self.obo_file)
        hpo_network = annotate(hpo_network, self.phenotype_to_diseases, self.num_diseases_annotated, self.alt2prim,
                              annotations_file=custom_annotation_file)

        self.assertAlmostEqual(hpo_network.nodes[self.hpo_id]['ic'], 6.38, 1)
Exemplo n.º 3
0
    def test_age_weight(self):
        # Test age based weight distribution and best_match_weighted_average calculation

        terms_a = ['HP:0001251', 'HP:0001263', 'HP:0001290',
                   'HP:0004322']  # ATAX, DD, HYP, SS
        terms_b = ['HP:0001263', 'HP:0001249', 'HP:0001290']  # DD, ID, HYP

        self.hpo_network = annotate(
            self.hpo_network,
            self.phenotype_to_diseases,
            self.num_diseases_annotated,
            self.alt2prim,
            ages_distribution_file=self.ages_distribution_file)

        age_a = 9.0
        age_b = 4.0

        # calculate weights based on patients age
        weights_a = {
            'age': calculate_age_weights(terms_a, age_b, self.hpo_network)
        }
        weights_b = {
            'age': calculate_age_weights(terms_b, age_a, self.hpo_network)
        }

        # make pairwise scores matrix
        df = pd.DataFrame([[4.22595743e-02, 3.92122308e-02, 3.04851573e-04],
                           [1.07473687e-01, 5.05101655e-01, 3.78305515e-04],
                           [3.69780479e-04, 3.78305515e-04, 4.64651944e-01],
                           [4.17139800e-04, 4.12232546e-04, 3.67984322e-04]],
                          index=pd.Index(terms_a, name='a'),
                          columns=pd.MultiIndex.from_arrays(
                              [['score'] * len(terms_b), terms_b],
                              names=[None, 'b']))
        # compute pairwise best match weighted average
        score_bmwa = self.scorer.best_match_weighted_average(
            df, weights_a, weights_b)

        self.assertAlmostEqual(score_bmwa, 0.3741, 4)

        # set all weights to 1.0, result should be the same as BMA without weights
        weights_a = {'disease_frequency': [1.] * len(terms_a)}
        weights_b = {'disease_frequency': [1.] * len(terms_b)}
        score_bmwa = self.scorer.best_match_weighted_average(
            df, weights_a, weights_b)

        self.assertAlmostEqual(score_bmwa, 0.2985, 4)

        # test term not in network
        terms_a = ['HP:Not_a_term']
        weights_a = calculate_age_weights(terms_a, age_b, self.hpo_network)
        self.assertEqual(weights_a, [1.0])

        # term in network no age
        terms_a = ['HP:0000001']
        weights_a = calculate_age_weights(terms_a, age_b, self.hpo_network)
        self.assertEqual(weights_a, [1.0])
Exemplo n.º 4
0
    def test_score_pairs_age(self):
        # Test reading in records files and calculating pairwise scores
        # read in records
        self.hpo_network = annotate(
            self.hpo_network,
            self.phenotype_to_diseases,
            self.num_diseases_annotated,
            self.alt2prim,
            ages_distribution_file=self.ages_distribution_file)

        records = parse_input(
            os.path.join(self.parent_dir, 'data/test.score-short.txt'),
            self.hpo_network, self.alt2prim)

        # create instance the scorer class
        scorer = Scorer(self.hpo_network,
                        summarization_method='BMWA',
                        min_score_mask=None)

        # select which patients to test in pairwise best_match_weighted_average
        input_records = [
            x for x in records if x['record_id'] in ['118200', '118210']
        ]

        results = scorer.score_records(
            input_records,
            input_records,
            [
                (0, 1),
            ],
        )
        self.assertEqual(len(results), 1)

        # the right answer =
        answer = np.average([0.166, 1.0, 1.0, 0.125, 0.25, 1.0, 1.0],
                            weights=[0.481, 1.0, 1.0, 0.0446, 1.0, 1.0, 1.0])

        self.assertAlmostEqual(float(results[0][2]), answer, 2)

        # Test identical records for which one age exist and one doesn't
        input_records = [
            x for x in records if x['record_id'] in ['118210', '118211']
        ]

        results = scorer.score_records(
            input_records,
            input_records,
            [
                (0, 1),
            ],
        )
        self.assertEqual(len(results), 1)

        self.assertAlmostEqual(float(results[0][2]), 1.0, 1)
Exemplo n.º 5
0
    def test_annotate_network(self):
        hpo_network = load_network(self.obo_file)
        alt2prim = generate_alternate_ids(hpo_network)

        # load phenotypes to diseases associations
        disease_to_phenotype_file = os.path.join(self.parent_dir,
                                                 'data/phenotype.hpoa')
        disease_records, phenotype_to_diseases = load_d2p(
            disease_to_phenotype_file, hpo_network, alt2prim)

        num_diseases_annotated = len(disease_records)
        hpo_network = annotate(hpo_network, phenotype_to_diseases,
                               num_diseases_annotated, alt2prim)

        self.assertAlmostEqual(hpo_network.nodes['HP:0010863']['ic'], 5.69, 2)
Exemplo n.º 6
0
    def setUpClass(cls):
        # parent dir
        cls.parent_dir = os.path.dirname(os.path.realpath(__file__))

        # load and process the network
        cls.obo_file = os.path.join(cls.parent_dir, 'data/hp.obo')
        cls.hpo_network = load_network(cls.obo_file)
        cls.alt2prim = generate_alternate_ids(cls.hpo_network)

        # load phenotypes to genes associations
        cls.disease_to_phenotype_file = os.path.join(cls.parent_dir, 'data/phenotype.hpoa')
        cls.disease_records, cls.phenotype_to_diseases = load_d2p(cls.disease_to_phenotype_file, cls.hpo_network, cls.alt2prim)

        cls.num_diseases_annotated = len(cls.disease_records)
        cls.hpo_network = annotate(cls.hpo_network, cls.phenotype_to_diseases, cls.num_diseases_annotated, cls.alt2prim)

        cls.hpo_id = 'HP:0010863'
        cls.disease_to_phenotype_output_file = os.path.join(cls.parent_dir, 'data/phenotype.noparents.hpoa')
Exemplo n.º 7
0
    def setUp(cls):
        # parent dir
        cls.parent_dir = os.path.dirname(os.path.realpath(__file__))

        # load and process the network
        cls.obo_file = os.path.join(cls.parent_dir, 'data/hp.obo')
        cls.hpo_network = load_network(cls.obo_file)
        cls.alt2prim = generate_alternate_ids(cls.hpo_network)
        cls.ages_distribution_file = os.path.join(cls.parent_dir,
                                                  'data/phenotype_age.tsv')

        # load phenotypes to genes associations
        cls.disease_to_phenotype_file = os.path.join(cls.parent_dir,
                                                     'data/phenotype.hpoa')
        cls.disease_records, cls.phenotype_to_diseases = load_d2p(
            cls.disease_to_phenotype_file, cls.hpo_network, cls.alt2prim)

        cls.num_diseases_annotated = len(cls.disease_records)
        cls.hpo_network = annotate(cls.hpo_network, cls.phenotype_to_diseases,
                                   cls.num_diseases_annotated, cls.alt2prim)

        # create instance the scorer class
        cls.scorer = Scorer(cls.hpo_network, min_score_mask=None)