Exemplo n.º 1
0
    def process_entry(self, entry: str):
        """
        Process a whole entry
        :param entry:
        :return:
        """

        doc = self.nlp(entry)
        analysis = Analysis()

        for sent in doc.sents:

            if self.do_general_analysis:
                self.count_tokens(sent)
            for noun_phrase in sent.noun_chunks:
                covered = RangeSet()

                for trigger in self.all_triggers:

                    if trigger in noun_phrase.text.lower():

                        start = noun_phrase.text.lower().index(trigger)

                        if not Range(start,
                                     len(noun_phrase.text) +
                                     start).intersection(covered):

                            covered.add(
                                Range(noun_phrase.start, noun_phrase.end))
                            self.__process_np(trigger, noun_phrase, doc,
                                              analysis)

        return analysis
Exemplo n.º 2
0
 def expand_star(self, max_line: int, other_split_files):
     source_file_range = Range(1, max_line, include_end=True)
     union_of_splits = RangeSet()
     for split in other_split_files:
         lines = split._lines  # pylint: disable=protected-access
         union_of_splits = union_of_splits.union(lines)
     diff = source_file_range.symmetric_difference(union_of_splits)
     self._lines.extend(diff)
Exemplo n.º 3
0
def test_rangeset_docstring():
    a = RangeSet()
    b = RangeSet([Range(0, 1), Range(2, 3), Range(4, 5)])
    c = RangeSet(Range(0, 1), Range(2, 3), Range(4, 5))
    d = RangeSet("[0, 1)", ["[1.5, 2)", "[2.5, 3)"], "[4, 5]")
    assert (str(a) == "{}")
    assert (str(b) == "{[0, 1), [2, 3), [4, 5)}")
    assert (str(c) == "{[0, 1), [2, 3), [4, 5)}")
    assert (b == c)
    assert (str(d) == "{[0, 1), [1.5, 2), [2.5, 3), [4, 5]}")

    asserterror(
        ValueError, RangeSet,
        ([[Range(0, 1), Range(2, 3)], [Range(4, 5), Range(6, 7)]], ))

    f = RangeSet("[0, 3]", "[2, 4)", "[5, 6]")
    assert (str(f) == "{[0, 4), [5, 6]}")
Exemplo n.º 4
0
    def _create_line_ranges(self, split_data: collections.abc.Mapping,
                            max_line: int):
        lines = split_data.get("lines")
        if not lines or not lines.strip():
            raise ConfigError(
                f'No lines specified for split file "{self._path}".')

        range_set = RangeSet()
        line_ranges = lines.split(",")
        for line_range in line_ranges:
            start, _, end = line_range.partition("-")
            if start.strip() == "*":
                self._has_star = True
                continue
            try:
                start = int(start)
                end = int(end) if end else start
                if not 0 < start <= max_line or not 0 < end <= max_line:
                    raise ValueError(f"Out of range (1-{max_line})")
                range_set.add(Range(start, end, include_end=True))
            except ValueError as ex:
                raise ConfigError(
                    f'Invalid lines for split file "{self._path}": {ex}')
        return range_set
Exemplo n.º 5
0
def test_rangeset_constructor_valid(args, ranges, strr, reprr, isempty):
    """
    Tests that the constructor of rngset works as intended. Also, as a byproduct,
    tests the .ranges(), .__str__(), .__repr__(), .clear(), and .isempty()
    """
    rangeset = RangeSet(*args)
    assert (ranges == rangeset.ranges())
    assert (strr == str(rangeset))
    assert (reprr == repr(rangeset))
    assert (isempty == rangeset.isempty())
    assert (isempty != bool(rangeset))
    assert (hash(rangeset) == hash(rangeset.copy()))
    rangeset.clear()
    assert ("{}" == str(rangeset))
    assert ("RangeSet{}" == repr(rangeset))
    assert (rangeset.isempty())
    assert (not bool(rangeset))
Exemplo n.º 6
0
def test_issue8():
    # issue: adding a Range to a RangeSet containing two non-overlapping ranges, such that the new range overlaps
    # with one but not the other, leads to a TypeError being raised.
    # cause: code was passing a Linked List Node instead of the node's value (a range)
    try:
        a = RangeSet()
        a.add(Range(100, 300))
        a.add(Range(400, 500))
        a.add(Range(500, 600))
        assert (str(a) == "{[100, 300), [400, 600)}")
        b = RangeSet()
        b.add(Range(400, 600))
        b.add(Range(200, 300))
        b.add(Range(100, 200))
        assert (str(b) == "{[100, 300), [400, 600)}")
    except TypeError:
        fail(
            "RangeSet should not have an issue concatenating to the second range of two in a RangeSet"
        )
Exemplo n.º 7
0
                   2018, 12, 1), False, "[2017-05-27, 2018-02-02]",
         "Range[datetime.date(2017, 5, 27), datetime.date(2018, 2, 2)]"),
        (Range(datetime.timedelta(0, 3600), datetime.timedelta(
            0, 7200)), datetime.timedelta(0, 6000), True, "[1:00:00, 2:00:00)",
         "Range[datetime.timedelta(seconds=3600), datetime.timedelta(seconds=7200))"
         ),
        (Range(datetime.timedelta(1, 1804), datetime.timedelta(3)),
         datetime.timedelta(1), False, "[1 day, 0:30:04, 3 days, 0:00:00)",
         "Range[datetime.timedelta(days=1, seconds=1804), datetime.timedelta(days=3))"
         ),
        (Range("begin", "end"), "middle", False, "[begin, end)",
         "Range['begin', 'end')"),
        (Range("begin",
               "end"), "cows", True, "[begin, end)", "Range['begin', 'end')"),
        # RangeSets
        (Range(1, 10), RangeSet(Range(2, 9)), True, "[1, 10)", "Range[1, 10)"),
        (Range(1,
               10), RangeSet(Range(2, 3), Range(4, 5), Range(6, 7), Range(
                   8, 9)), True, "[1, 10)", "Range[1, 10)"),
        (Range(1, 10), RangeSet(Range(0,
                                      11)), False, "[1, 10)", "Range[1, 10)"),
        (Range(1, 4), RangeSet(Range(2, 3), Range(
            5, 6)), False, "[1, 4)", "Range[1, 4)"),
    ])
def test_range_contains(rng, item, contains, strr, reprr):
    """
    Tests the __contains__, __str__, __repr__, and __hash__ methods of the range.
    """
    assert (contains == (item in rng))
    assert (strr == str(rng))
    assert (reprr == repr(rng))
Exemplo n.º 8
0
def test_rangeset_isdisjoint(rng1, rng2, isdisjoint, error_type):
    if error_type is not None:
        asserterror(error_type, rng1.isdisjoint, (rng2, ))
    else:
        assert (rng1.isdisjoint(rng2) == RangeSet(rng2).isdisjoint(rng1))
        assert (isdisjoint == rng1.isdisjoint(rng2))
Exemplo n.º 9
0
         [Range(0, 1), Range(1.5, 2),
          Range(2.5, 3),
          Range(4, 5)], "{[0, 1), [1.5, 2), [2.5, 3), [4, 5)}",
         "RangeSet{Range[0, 1), Range[1.5, 2), Range[2.5, 3), Range[4, 5)}",
         False),  # mix Rangelike, iterable args
        (["[0, 3]", "[2, 4)", "[5, 6]"], [Range(0, 4),
                                          Range("[5, 6]")], "{[0, 4), [5, 6]}",
         "RangeSet{Range[0, 4), Range[5, 6]}", False),  # overlapping
        (["[0, 4)", "(1, 3)"], [Range(0, 4)], "{[0, 4)}",
         "RangeSet{Range[0, 4)}", False),  # overlapping 2
        ([Range(1, 3), Range(2, 4)], [Range(
            1, 4)], "{[1, 4)}", "RangeSet{Range[1, 4)}", False),
        ([Range('apple', 'carrot'),
          Range('banana', 'durian')], [Range('apple', 'durian')],
         "{[apple, durian)}", "RangeSet{Range['apple', 'durian')}", False),
        ([RangeSet("(0, 1)", "(1, 2)", "(2, 3)")],
         [Range("(0, 1)"), Range("(1, 2)"),
          Range("(2, 3)")], "{(0, 1), (1, 2), (2, 3)}",
         "RangeSet{Range(0, 1), Range(1, 2), Range(2, 3)}", False)
    ])
def test_rangeset_constructor_valid(args, ranges, strr, reprr, isempty):
    """
    Tests that the constructor of rngset works as intended. Also, as a byproduct,
    tests the .ranges(), .__str__(), .__repr__(), .clear(), and .isempty()
    """
    rangeset = RangeSet(*args)
    assert (ranges == rangeset.ranges())
    assert (strr == str(rangeset))
    assert (reprr == repr(rangeset))
    assert (isempty == rangeset.isempty())
    assert (isempty != bool(rangeset))