Python Extractor 예제들, txt2hpo.extract.Extractor Python 예제들

예제 #1

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_extract_full_context(self):
     extract = Extractor(max_neighbors=2,
                         correct_spelling=False,
                         phenotypes_only=False)
     resp = extract.hpo("X linked")
     self.assertEqual(resp.entries[0]['hpid'][0], 'HP:0001417')
     self.assertEqual(resp.entries[0]['type'], 'mode_of_inheritance')

예제 #2

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_hpo_big_text_spellcheck_off(self):
     # test parsing a page
     extract = Extractor(max_neighbors=2,
                         correct_spelling=False,
                         remove_overlapping=True)
     res = extract.hpo(test_case11_text)
     self.assertEqual(res.n_entries, 7)

예제 #3

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_extract_ambiguous(self):
     # test resolver works
     extract = Extractor(resolve_conflicts=True)
     truth = [{"hpid": ["HP:0001631"], "index": [44, 47], "matched": "ASD"}]
     test1 = extract.hpo(
         "secundum, all underwent surgical repair for ASD except for 1 individual whose defect spontaneously closed"
     )
     self.assertEqual(truth, test1.entries_sans_context)

예제 #4

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_extract_json_property(self):
     extract = Extractor(max_neighbors=2)
     truth = json.dumps([{
         "hpid": ["HP:0000154"],
         "index": [16, 26],
         "matched": "wide mouth"
     }])
     resp = extract.hpo("Wide gait and a wide mouth")
     self.assertEqual(truth, resp.json)

예제 #5

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_extract_from_repeated_context(self):
     extract = Extractor()
     truth = [{
         "hpid": ["HP:0000154"],
         "index": [16, 26],
         "matched": "wide mouth"
     }]
     resp = extract.hpo("Wide gait and a wide mouth")
     self.assertEqual(truth, resp.entries_sans_context)

예제 #6

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_multiple_matches(self):
     extract = Extractor(correct_spelling=False,
                         remove_overlapping=True,
                         resolve_conflicts=True)
     resp = extract.hpo("l pre auricular ear pit.")
     self.assertEqual(resp.hpids, ['HP:0004467'])
     resp = extract.hpo("Coloboma, microphthalmia, macrocephaly, ear pit.")
     self.assertEqual(
         set(resp.hpids),
         set(['HP:0000589', 'HP:0004467', 'HP:0000568', 'HP:0000256']))

예제 #7

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_negated_hpo_retention(self):
        extract = Extractor(correct_spelling=False,
                            remove_overlapping=True,
                            resolve_conflicts=True,
                            max_neighbors=2,
                            phenotypes_only=False,
                            remove_negated=True)

        resp = extract.hpo("Patient has developmental delay but no hypotonia")
        self.assertEqual(["HP:0001252"], resp.negated_hpids)

        resp = extract.hpo("developmental delay and a wide mouth")
        self.assertEqual([], resp.negated_hpids)

        resp = extract.hpo("developmental delay with no wide mouth")
        self.assertEqual(['HP:0000154'], resp.negated_hpids)

        resp = extract.hpo("developmental delay without a wide mouth")
        self.assertEqual(['HP:0000154'], resp.negated_hpids)

        resp = extract.hpo("no developmental delay, but has a wide mouth")
        self.assertEqual(['HP:0001263'], resp.negated_hpids)

        resp = extract.hpo(
            "the patient has a wide mouth but no developmental delay.")
        self.assertEqual(['HP:0001263'], resp.negated_hpids)

        resp = extract.hpo(
            "the patient does not have either a wide mouth or developmental delay."
        )
        self.assertEqual(set(['HP:0000154', 'HP:0001263']),
                         set(resp.negated_hpids))

예제 #8

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_stop_word_phenos(self):
        # Test extracting multiple phenotypes with max_neighbors
        extract = Extractor(correct_spelling=True, max_neighbors=3)
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 23],
            "matched": "developmental and delay"
        }]
        self.assertEqual(
            extract.hpo("developmental and delay").entries_sans_context, truth)

        extract = Extractor(correct_spelling=True, max_neighbors=1)
        truth = []
        self.assertEqual(
            extract.hpo("developmental and delay").entries_sans_context, truth)

        extract = Extractor(correct_spelling=False)

        # Test extracting single phenotype followed by multiword phenotype
        truth = [{
            "hpid": ["HP:0000750"],
            "index": [0, 12],
            "matched": "Speech delay"
        }, {
            "hpid": ["HP:0100710"],
            "index": [17, 26],
            "matched": "impulsive"
        }]
        self.assertEqual(
            extract.hpo("Speech delay and impulsive").entries_sans_context,
            truth)

        # Test extracting multiword phenotype followed by single phenotype
        truth = [{
            "hpid": ["HP:0100710"],
            "index": [0, 9],
            "matched": "Impulsive"
        }, {
            "hpid": ["HP:0000750"],
            "index": [14, 26],
            "matched": "speech delay"
        }]
        self.assertEqual(
            extract.hpo("Impulsive and speech delay").entries_sans_context,
            truth)

예제 #9

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_custom_synonyms(self):
        # test adding custom synonyms
        custom_syn = {
            "HP:0001263": ['DD', 'GDD'],
            "HP:0000729": ['ASD', 'PDD']
        }

        extract = Extractor(custom_synonyms=custom_syn)
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 3],
            "matched": "GDD"
        }, {
            "hpid": ["HP:0001263"],
            "index": [4, 6],
            "matched": "DD"
        }]
        self.assertEqual(extract.hpo("GDD DD").entries_sans_context, truth)

예제 #10

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_conflict_instantiate(self):
        # Test that reinstantiation does not affect results
        extract = Extractor(resolve_conflicts=True)
        self.assertEqual(extract.resolve_conflicts, True)
        res = extract.hpo("ASD")
        self.assertEqual(len(res.entries[0]['hpid']), 1)

        extract = Extractor(resolve_conflicts=False)
        self.assertEqual(extract.resolve_conflicts, False)
        res = extract.hpo("ASD")
        self.assertEqual(len(res.entries[0]['hpid']), 2)

        extract = Extractor(resolve_conflicts=True)
        self.assertEqual(extract.resolve_conflicts, True)
        res = extract.hpo("ASD")
        self.assertEqual(len(res.entries[0]['hpid']), 1)

예제 #11

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_iteration_over_chunks(self):
     # test performing multiple extractions in a row
     sentences = ['Developmental delay', 'Hypotonia']
     extract = Extractor(correct_spelling=False)
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)
     sentences = ['Hypotonia', 'Developmental delay']
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)
     extract = Extractor(correct_spelling=True)
     sentences = ['Developmental delay', 'Hypotonia']
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)
     sentences = ['Hypotonia', 'Developmental delay']
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)
     sentences = ['Developmental delay', 'Hypotonia']
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)
     sentences = ['Hypotonia', 'Developmental delay']
     for sentence in sentences:
         self.assertNotEqual(extract.hpo(sentence).n_entries, 0)

예제 #12

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_handling_term_hyphenation(self):
        extract = Extractor(correct_spelling=False,
                            remove_overlapping=True,
                            resolve_conflicts=True,
                            max_neighbors=2,
                            phenotypes_only=False)
        hyphenated_phenos = \
            [
            (hpo_network.nodes()[x]['name'], x) for x in hpo_network.nodes() \
            if (
                    '-' in hpo_network.nodes()[x]['name'] and
                    ',' not in hpo_network.nodes()[x]['name'] and
                    '.' not in hpo_network.nodes()[x]['name']
                )

            ]
        # Phenotypes where word-order is important is a limitation of current parsing method
        known_bugs = ['HP:0000510', 'HP:0030932', 'HP:0001215']
        long_phenos = [
            'HP:0011654', 'HP:0410303', 'HP:0000654', 'HP:0000847',
            'HP:0000864', 'HP:0000877', 'HP:0001074'
        ]
        hyphenated_phenos = [
            x for x in hyphenated_phenos
            if x[1] not in known_bugs + long_phenos
        ]
        hyphenated_phenos = [
            x for x in hyphenated_phenos if x[1] not in non_phenos
        ]
        hyphenated_phenos = hyphenated_phenos[:10]
        for test in hyphenated_phenos:
            # current version is not expected to extract very long phenotypes
            hpids = extract.hpo(test[0]).hpids
            self.assertEqual(hpids, [test[1]])
            # replace hyphens with space
            hpids = extract.hpo(test[0].replace('-', ' ')).hpids
            self.assertEqual(hpids, [test[1]])

예제 #13

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_capitalization_affecting_outcome(self):
        extract = Extractor(correct_spelling=False)
        resp = extract.hpo("enlarged heart")
        self.assertEqual(resp.hpids, ['HP:0001640'])

        resp = extract.hpo(" enlarged heart")
        self.assertEqual(resp.hpids, ['HP:0001640'])

        resp = extract.hpo("Enlarged heart")
        self.assertEqual(resp.hpids, ['HP:0001640'])

        resp = extract.hpo(" Enlarged heart")
        self.assertEqual(resp.hpids, ['HP:0001640'])

        resp = extract.hpo("Male with Sotos, enlarged heart")
        self.assertEqual(resp.hpids, ['HP:0001640'])

        resp = extract.hpo("Myoclonus Seizures")
        self.assertEqual(set(resp.hpids), set(['HP:0002123']))

예제 #14

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_hpo_big_text_max_neighbors(self):
        # test parsing a page
        extract = Extractor(max_neighbors=1,
                            correct_spelling=True,
                            remove_overlapping=False)
        hpo_max_2 = extract.hpo(test_case11_text).hpids
        extract = Extractor(max_neighbors=3,
                            correct_spelling=True,
                            remove_overlapping=False)
        hpo_max_3 = extract.hpo(test_case11_text).hpids

        self.assertNotEqual(hpo_max_2, hpo_max_3)

예제 #15

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_remove_overlapping(self):
        extract = Extractor(correct_spelling=False, remove_overlapping=False)
        resp = extract.hpo("Polycystic kidney disease and myoclonus seizures.")
        self.assertEqual(
            set(resp.hpids),
            set([
                'HP:0000113', 'HP:0000112', 'HP:0001250', 'HP:0002123',
                'HP:0001336'
            ]))

        extract = Extractor(correct_spelling=False, remove_overlapping=True)
        resp = extract.hpo("Polycystic kidney disease and myoclonus seizures.")
        self.assertEqual(set(resp.hpids), set(['HP:0002123', 'HP:0000113']))

예제 #16

0

파일 보기

파일: experiment.py 프로젝트: arvkevi/phenopy

def run_phenoseries_experiment(outdir=None,
                               phenotypic_series_filepath=None,
                               min_hpos=2,
                               min_entities=4,
                               phenoseries_fraction=1.0,
                               scoring_method="HRSS",
                               threads=1,
                               omim_phenotypes_file=None,
                               pairwise_mim_scores_file=None):

    if outdir is None:
        outdir = os.getcwd

    # load HPO network
    # data directory
    phenopy_data_directory = os.path.join(os.getenv("HOME"), ".phenopy/data")

    # files used in building the annotated HPO network
    obo_file = os.path.join(phenopy_data_directory, "hp.obo")
    disease_to_phenotype_file = os.path.join(phenopy_data_directory,
                                             "phenotype.hpoa")

    hpo_network, alt2prim, _ = generate_annotated_hpo_network(
        obo_file, disease_to_phenotype_file, ages_distribution_file=None)

    # read the phenotypic series file as a DataFrame
    psdf = pd.read_csv(
        phenotypic_series_filepath,
        sep="\t",
        comment="#",
        names=["PS", "MIM", "Phenotype"],
    )
    # null phenotypes are actually null MIM id fields, so just drop these
    psdf = psdf.dropna().sample(frac=phenoseries_fraction, random_state=42)
    psdf.reset_index(inplace=True, drop=True)

    # create a dictionary for phenotypic series to list of omim ids mapping
    ps2mimids = {}
    for ps, mim_ids in psdf.groupby(["PS"])["MIM"]:
        # more than two mims in a ps
        if len(mim_ids) >= 2:
            ps2mimids[ps] = list(set([int(mid) for mid in mim_ids.tolist()]))

    # invert the ps2mimid dictionary for easy lookup of which ps a mim belongs to
    mim2psids = {}
    for mim_id, ps in psdf.groupby(["MIM"])["PS"]:
        mim2psids[int(mim_id)] = ps.tolist()

    fields_to_use = [
        "text",
        "description",
        "otherFeatures",
        "biochemicalFeatures",
        "diagnosis",
        "clinicalFeatures",
    ]

    if omim_phenotypes_file == "":
        logger.info("Scraping OMIM Diseases text")
        mim_texts = {}
        for mim_id in mim2psids:
            mim_response = request_mimid_info(mim_id)
            try:
                mim_info = mim_response.json()
            except AttributeError:
                break
            mim_text = mim_info["omim"]["entryList"][0]["entry"][
                "textSectionList"]

            all_mim_text = ""
            for text_section in mim_text:
                section_name = text_section["textSection"]["textSectionName"]
                if section_name in fields_to_use:
                    # unique_section_names.add(section_name)
                    all_mim_text += " " + text_section["textSection"][
                        "textSectionContent"]

            mim_texts[mim_id] = all_mim_text
        # instantiate txt2hpo's Exctractor class to perform named entity recognition
        extractor = Extractor(remove_negated=True,
                              max_neighbors=3,
                              correct_spelling=False)

        # loop over the MIM ids and extract hpo ids from each MIM's text fields
        mim_hpos = {}
        for mim_id in mim2psids:
            mim_hpos[mim_id] = extractor.hpo(mim_texts[mim_id]).hpids

        mimdf = pd.DataFrame()
        mimdf["omim_id"] = list(mim2psids.keys())
        mimdf["hpo_terms"] = mimdf["omim_id"].apply(
            lambda mim_id: mim_hpos[mim_id])
        mimdf.to_csv(os.path.join(outdir, "omim_phenotypes.txt"),
                     index=False,
                     sep='\t')

    else:
        logger.info("You passed an OMIM disease to phenotype file")
        try:
            mimdf = pd.read_csv(omim_phenotypes_file, sep="\t")
            mimdf["omim_id"] = mimdf["omim_id"].astype(int)
            mimdf["hpo_terms"] = mimdf["hpo_terms"].apply(literal_eval)
            mim_hpos = dict(zip(mimdf["omim_id"], mimdf["hpo_terms"]))
        except FileNotFoundError:
            sys.exit("Please provide a valid file path")

    # do we need this?
    # mim_hpos = {mim_id: hpos for mim_id, hpos in mim_hpos.items()}

    # clean up HPO ids in lists
    for mim_id, hpo_ids in mim_hpos.items():
        mim_hpos[mim_id] = convert_and_filter_hpoids(hpo_ids, hpo_network,
                                                     alt2prim)

    # remove entities (mims) that have less than min_hpos
    mims_to_remove = []
    for mim_id, hpo_ids in mim_hpos.copy().items():
        if len(hpo_ids) <= min_hpos:
            mims_to_remove.append(mim_id)

    # Now remove the entities (mim ids) with less than min_hpos
    experiment_ps2mimids = {}
    # remove these mims from ps
    for ps, mimids in ps2mimids.copy().items():
        experiment_ps2mimids[ps] = []
        for ps_mim_id in mimids:
            if ps_mim_id not in mims_to_remove:
                experiment_ps2mimids[ps].append(ps_mim_id)

    # After removing entities, make sure the series has min number of entities
    # get lists of mims and their PS
    remove_these_ps = []
    for ps, mimids in experiment_ps2mimids.items():
        if len(mimids) < min_entities:
            remove_these_ps.append(ps)

    for psid in remove_these_ps:
        del experiment_ps2mimids[psid]

    # Create a unique list of entity ids, for scoring later
    experiment_omims = set()
    for psid, mim_ids in experiment_ps2mimids.items():
        for mim in mim_ids:
            experiment_omims.add(mim)
    experiment_omims = list(experiment_omims)

    # make a DataFrame for entity ids
    mimdf = pd.DataFrame()
    mimdf["omim_id"] = experiment_omims
    mimdf["hpo_terms"] = mimdf["omim_id"].apply(
        lambda mim_id: mim_hpos[mim_id])

    if pairwise_mim_scores_file == "":
        scorer = Scorer(hpo_network, scoring_method=scoring_method)
        records = [{
            "record_id":
            mim_id,
            "terms":
            convert_and_filter_hpoids(hpo_terms, hpo_network, alt2prim),
            "weights": {},
        } for mim_id, hpo_terms in dict(
            zip(mimdf["omim_id"], mimdf["hpo_terms"])).items()]

        results = scorer.score_records(records,
                                       records,
                                       half_product(len(records),
                                                    len(records)),
                                       threads=threads)

        pairwise_scores = pd.DataFrame(
            results, columns=["mimid1", "mimid2", "phenopy-score"])
        # convert to square form
        pairwise_scores = pairwise_scores.set_index(["mimid1",
                                                     "mimid2"]).unstack()
        # This pandas method chain fills in the missing scores of the square matrix with the values from the transpose of df.
        pairwise_scores = (pairwise_scores["phenopy-score"].reset_index(
            drop=True).fillna(
                pairwise_scores.T.droplevel(0).reset_index(
                    drop=True)).set_index(pairwise_scores.index, drop=True))
        # reindex with the mimdf index
        pairwise_scores = pairwise_scores.reindex(mimdf["omim_id"].tolist())
        pairwise_scores = pairwise_scores[mimdf["omim_id"].tolist()]
        pd.DataFrame(pairwise_scores).to_csv(os.path.join(
            outdir, 'phenoseries.psim_matrix.txt'),
                                             sep='\t')
    else:
        pairwise_scores = pd.read_csv(pairwise_mim_scores_file, sep='\t')

    ranksdf = make_rank_dataframe(
        pairwise_scores.astype(float).values, mimdf, experiment_ps2mimids)
    ranksdf.to_csv(os.path.join(outdir, "phenoseries.rankdf.txt"), sep="\t")

예제 #17

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

 def test_hpo_big_text_spellcheck_on(self):
     # test parsing a page
     extract = Extractor(max_neighbors=2, remove_overlapping=False)
     self.assertEqual(extract.hpo(test_case11_text).n_entries, 12)

예제 #18

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_extract_without_negated(self):

        # negation should not apply if negation is part of matched string
        extract = Extractor(remove_negated=True)
        resp = extract.hpo("the patient presents with absent speech")
        self.assertEqual(resp.hpids, ['HP:0001344'])

        extract = Extractor()
        resp = extract.hpo("developmental delay and a wide mouth")
        resp.detect_negation()
        self.assertEqual(set(['HP:0000154', 'HP:0001263']), set(resp.hpids))

        resp = extract.hpo("developmental delay with no wide mouth")
        resp.detect_negation()
        resp.remove_negated()
        self.assertEqual(['HP:0001263'], resp.hpids)

        extract = Extractor(remove_negated=True)
        resp = extract.hpo("developmental delay without a wide mouth")
        self.assertEqual(['HP:0001263'], resp.hpids)

        extract = Extractor(remove_negated=True)
        resp = extract.hpo("no developmental delay, but has a wide mouth")
        self.assertEqual(['HP:0000154'], resp.hpids)

        extract = Extractor(remove_negated=True)
        resp = extract.hpo(
            "the patient has a wide mouth but no developmental delay.")
        self.assertEqual(['HP:0000154'], resp.hpids)

        extract = Extractor(remove_negated=True)
        resp = extract.hpo(
            "the patient does not have either a wide mouth or developmental delay."
        )
        self.assertEqual([], resp.hpids)

예제 #19

0

파일 보기

파일: test_extract.py 프로젝트: GeneDx/txt2hpo

    def test_hpo(self):

        extract = Extractor(correct_spelling=False)

        # Test extracting single phenotype
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [0, 9],
            "matched": "Hypotonia"
        }]
        self.assertEqual(extract.hpo("Hypotonia").entries_sans_context, truth)

        # Test adding non phenotypic term
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [5, 14],
            "matched": "hypotonia"
        }]
        self.assertEqual(
            extract.hpo("Word hypotonia").entries_sans_context, truth)

        # Test handling punctuation
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [6, 15],
            "matched": "hypotonia"
        }]
        self.assertEqual(
            extract.hpo("Word, hypotonia").entries_sans_context, truth)

        # Test extracting a multiword phenotype
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 19],
            "matched": "Developmental delay"
        }]
        self.assertEqual(
            extract.hpo("Developmental delay").entries_sans_context, truth)

        # Test extracting a multiword phenotype with reversed word order
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 19],
            "matched": "Delay developmental"
        }]

        self.assertEqual(
            extract.hpo("Delay developmental").entries_sans_context, truth)

        # Test extracting a phenotype with inflectional endings
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [0, 9],
            "matched": "Hypotonic"
        }]
        self.assertEqual(extract.hpo("Hypotonic").entries_sans_context, truth)

        # Test extracting a multiword phenotype with inflectional endings and reversed order
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 19],
            "matched": "Delayed development"
        }]
        self.assertEqual(
            extract.hpo("Delayed development").entries_sans_context, truth)

        # Test extracting multiword phenotype following an unrelated phenotypic term
        truth = [{
            "hpid": ["HP:0000365"],
            "index": [6, 18],
            "matched": "hearing loss"
        }]
        self.assertEqual(
            extract.hpo("Delay hearing loss").entries_sans_context, truth)

        # Test extracting multiword phenotype preceding an unrelated phenotypic term
        truth = [{
            "hpid": ["HP:0000365"],
            "index": [0, 12],
            "matched": "Hearing loss"
        }]
        self.assertEqual(
            extract.hpo("Hearing loss following").entries_sans_context, truth)

        # Test extracting two multiword phenotype preceding interrupted by an unrelated phenotypic term
        truth = [
            {
                "hpid": ["HP:0000365"],
                "index": [0, 12],
                "matched": "Hearing loss"
            },
            {
                "hpid": ["HP:0001263"],
                "index": [23, 42],
                "matched": "developmental delay"
            },
        ]
        self.assertEqual(
            extract.hpo("Hearing loss following developmental delay").
            entries_sans_context, truth)

        # Test spellchecker
        extract = Extractor(correct_spelling=True)
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [0, 9],
            "matched": "Hypotonic"
        }]

        self.assertEqual(extract.hpo("Hyptonic").entries_sans_context, truth)

        truth = []
        extract = Extractor(correct_spelling=False)
        self.assertEqual(extract.hpo("Hyptonic").entries_sans_context, truth)

        truth = [
            {
                "hpid": ["HP:0002757"],
                "index": [12, 30],
                "matched": "multiple fractures"
            },
            {
                "hpid": ["HP:0000938"],
                "index": [35, 45],
                "matched": "osteopenia"
            },
        ]

        self.assertEqual(
            truth,
            extract.hpo("Female with multiple fractures and osteopenia NA NA").
            entries_sans_context)

        truth = [{
            "hpid": ["HP:0001156"],
            "index": [30, 43],
            "matched": "brachydactyly"
        }]

        self.assertEqual(
            truth,
            extract.hpo("Female with fourth metacarpal brachydactyly").
            entries_sans_context)

        extract = Extractor(correct_spelling=False, remove_overlapping=False)
        truth = [
            {
                "hpid": ["HP:0000964"],
                "index": [10, 16],
                "matched": "eczema"
            },
            {
                "hpid": ["HP:0000988"],
                "index": [18, 27],
                "matched": "skin rash"
            },
            {
                "hpid": ["HP:0000988"],
                "index": [23, 27],
                "matched": "rash"
            },
            {
                "hpid": ["HP:0008070"],
                "index": [33, 44],
                "matched": "sparse hair"
            },
        ]
        resp = extract.hpo("Male with eczema, skin rash, and sparse hair"
                           ).entries_sans_context
        self.assertEqual(truth, resp)

        # Test extracting an abbreviated phenotype
        truth = [{"hpid": ["HP:0001370"], "index": [0, 2], "matched": "RA"}]
        self.assertEqual(extract.hpo("RA").entries_sans_context, truth)

        # Test extracting multiple phenotypes
        truth = [{
            "hpid": ["HP:0001252"],
            "index": [0, 9],
            "matched": "Hypotonia"
        }, {
            "hpid": ["HP:0001263"],
            "index": [11, 30],
            "matched": "developmental delay"
        }]
        self.assertEqual(
            extract.hpo("Hypotonia, developmental delay").entries_sans_context,
            truth)

        # Test term indexing given max length of extracted text
        extract = Extractor(correct_spelling=False,
                            max_length=20,
                            chunk_by="max_length")
        truth = [{
            "hpid": ["HP:0001263"],
            "index": [0, 19],
            "matched": "Developmental delay"
        }, {
            "hpid": ["HP:0001252"],
            "index": [21, 30],
            "matched": "hypotonia"
        }]
        self.assertEqual(
            extract.hpo("Developmental delay, hypotonia").entries_sans_context,
            truth)