Пример #1
0
def test_pattern_matches_sentences():
    sentences = Sentences(' I want this, or that.\n These and those.')
    # first sentence
    match = sentences.get_pattern(Pattern('(this|that)'), get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'this'
    assert start == 7
    assert end == 11
    # second sentence
    match = sentences.get_pattern(Pattern('(these|those)'), get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'These'
    assert start == 23
    assert end == 28
Пример #2
0
    def get_pattern(self,
                    pat: Pattern,
                    *,
                    index=0,
                    get_indices=False,
                    return_negation=False,
                    return_negation_keyword=False):
        """

        :param return_negation: if True return Negation instance rather than ignoring negation
        :param pat:
        :param index:
        :param get_indices: to maintain backward compatibility
        :return:
        """
        # incorporate offset information
        m = pat.matches(self.text,
                        offset=self.start,
                        return_negation=return_negation)
        self._update_last_search(bool(m))
        if m:
            self.matches.add(m)
            if get_indices:  # offset has already been added in pat.matches
                if return_negation_keyword:
                    return m.group(index), m.start(index), m.end(
                        index), m.neg_group()
                return m.group(index), m.start(index), m.end(index)
            elif return_negation_keyword:
                return m.group(index), m.neg_group()
            else:
                return m.group(index)
Пример #3
0
def test_pattern_matches_sentences_keep_offsets():
    sentences = Sentences(' I want this, or that.\n These and those.',
                          ssplit=keep_offsets_ssplit)
    # first sentence
    match = sentences.get_pattern(Pattern('(this|that)'), get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'this'
    assert start == 8
    assert end == 12
    # second sentence
    match = sentences.get_pattern(Pattern('(these|those)'), get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'These'
    assert start == 24
    assert end == 29
Пример #4
0
 def has_pattern(self, pat: Pattern, ignore_negation=False):
     m = pat.matches(self.text,
                     ignore_negation=ignore_negation,
                     offset=self.start)
     self._update_last_search(bool(m))
     if m:
         self.matches.add(m)
     return m
Пример #5
0
def test_pattern_matches_sentence():
    pat = Pattern('(this|that)')
    sentence = Sentence('\t I want this, or that.\n')
    match = sentence.get_pattern(pat, get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'this'
    assert start == 9
    assert end == 13
Пример #6
0
def test_pattern_return_negate():
    m = Pattern('test', negates=[r'\bnot?\b']).matches('do not test this',
                                                       return_negation=True)
    assert isinstance(m, Negation)
    assert m.neg_group() == 'not'
    assert m.match == 'test'
Пример #7
0
def test_sentence_return_negation_keyword():
    p = Pattern('test', negates=[r'\bnot?\b'])
    text, neg = Sentence('do not test this').get_pattern(
        p, return_negation=True, return_negation_keyword=True)
    assert text == 'test'
    assert neg == 'not'
Пример #8
0
def test_sentence_return_negate():
    p = Pattern('test', negates=[r'\bnot?\b'])
    text = Sentence('do not test this').get_pattern(p, return_negation=True)
    assert text == 'test'
Пример #9
0
def test_pattern_no_return_negate():
    m = Pattern('test', negates=[r'\bnot?\b']).matches('do not test this')
    assert m is False
Пример #10
0
    assert match is not None
    s, start, end = match
    assert s == 'this'
    assert start == 7
    assert end == 11
    # second sentence
    match = sentences.get_pattern(Pattern('(these|those)'), get_indices=True)
    assert match is not None
    s, start, end = match
    assert s == 'These'
    assert start == 23
    assert end == 28


@pytest.mark.parametrize(('pat', 'sentence', 'n_matches'), [
    (Pattern('(this|that)'), ' I want this, or that.\n', 2),
])
def test_pattern_finditer_sentence(pat: Pattern, sentence: str, n_matches):
    sentence = Sentence(sentence)
    matches = list(x[0] for x in sentence.get_patterns(pat))  # text only
    assert len(matches) == n_matches


@pytest.mark.parametrize(('pat', 'text', 'n_matches'), [
    (Pattern('(this|that)'), ' I want this, or that.\n\n But not that', 3),
])
def test_pattern_finditer_sentences(pat: Pattern, text: str, n_matches):
    sentences = Sentences(text)
    matches = list(sentences.get_patterns(pat))
    assert len(matches) == n_matches
Пример #11
0
"""
Useful phrases for negation when building patterns.
"""
from runrex.algo import Pattern

# date pattern
years_ago = r'(?:\d+ (?:year|yr|week|wk|month|mon|day)s? (?:ago|before|previous))'
date_pat = r'\d+[-/]\d+[-/]\d+'
date2_pat = r'\d+[/]\d+'
month_pat = r'\b(?:jan|feb|mar|apr|may|jun|jul|aug|sept|oct|nov|dec)\w*(?:\W*\d{1,2})?\W*\d{4}'
month_only_pat = r'in\b(?:jan|feb|mar|apr|may|jun|jul|aug|sept|oct|nov|dec)\w*'
DATE_PAT = Pattern(
    f'({years_ago}|{date_pat}|{date2_pat}|{month_pat}|{month_only_pat})')

# avoid 'last' or 'in' or 'since'
safe_may = r'(?<!in|st|ce) may (?!\d)'

# useful starting phrases for detecting negation, etc.
boilerplate = r'\b(pamphlet|warning|information|review|side effect|counsel|\bsign|ensure' \
              r'|risk|\bif\b|after your visit|appt|appointment|due (to|for|at)|recommend' \
              r'|pamphlet|schedul|doctor|contact|\bhow\b|\bcall|includ|failure|' \
              r'associated|avoid|instruct|guideline)'
possible = r'\b(unlikely|\bposs\b|possib(ly|le|ility)|improbable|potential|susp(ect|icious)|' \
           r'chance|may\b|afraid|concern|tentative|doubt|thought|think)'
POSSIBLE_PAT = Pattern(possible)

negation = r'(no evidence|without|r/o|rule out|normal|\bnot?\b|\bor\b|denies|negative for)'
historical = r'(history|previous|\bhx\b|\bpast\b|\bprior\b|\bh/o\b)'
hypothetical = r'(' \
               r'option|possib\w+|desire|want|will|\bcan\b|usual' \
               r'|\bor\b|like|would|need|until|request|when|you\Wll' \
Пример #12
0
from runrex.algo import Pattern

BURDEN = Pattern(
    '(burden|debt)',
    negates=['not?'],  # exclude a match
    requires=['heavy', r'a\W*lot', 'significant']  # require this for match
)
Пример #13
0
def test_is_close_to(text, start, end, window, exp):
    assert is_close_to(Pattern(r'\bpain\b'), text, start, end, window) is exp