Exemplo n.º 1
0
def test_terminology_matcher(store0, terminology0):

    for store in terminology0.stores.all():
        for unit in store.units.all():
            terminology.get(unit.__class__)(unit).stem()

    unit = store0.units.first()
    matcher = terminology_matcher.get(unit.__class__)(unit)
    assert matcher.text == unit.source_f
    assert (
        matcher.split(matcher.text)
        == re.split(u"[\W]+", matcher.text))
    assert (
        matcher.tokens
        == [t.lower()
            for t
            in matcher.split(matcher.text)
            if (len(t) > 2
                and t not in matcher.stopwords)])
    assert matcher.stems == set(matcher.stemmer(t) for t in matcher.tokens)
    assert (
        matcher.matches
        == matcher.similar(
            matcher.terminology_units.filter(
                stems__root__in=matcher.stems).distinct()))
    unit.source_f = "on the cycle home"
    unit.save()
    matches = []
    matched = []
    results = matcher.terminology_units.filter(
        stems__root__in=matcher.stems).distinct()
    for result in results:
        target_pair = (
            result.source_f.lower().strip(),
            result.target_f.lower().strip())
        if target_pair in matched:
            continue
        similarity = matcher.comparison.similarity(result.source_f)
        if similarity > matcher.similarity_threshold:
            matches.append((similarity, result))
            matched.append(target_pair)
    assert (
        matcher.similar(results)
        == sorted(matches, key=lambda x: -x[0])[:matcher.max_matches])
    assert (
        matcher.matches
        == matcher.similar(results))
Exemplo n.º 2
0
 def get_terminology(self):
     """get terminology suggestions"""
     results = terminology_matcher.get(self.__class__)(self).matches
     return [m[1] for m in results]
Exemplo n.º 3
0
 def get_terminology(self):
     """get terminology suggestions"""
     results = terminology_matcher.get(self.__class__)(self).matches
     return [m[1] for m in results]