def test_filter_matches_filters_multiple_nested_contained_matches_and_large_overlapping(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5)) large_overlap = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) contained = LicenseMatch(rule=r1, qspan=Span(1, 4), ispan=Span(1, 4)) in_contained = LicenseMatch(rule=r1, qspan=Span(2, 3), ispan=Span(2, 3)) result, discarded = filter_contained_matches([m1, contained, in_contained, large_overlap]) assert [m1] == result assert discarded
def test_files_does_filter_contained_matches_of_different_rules_with_same_licensing(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6)) matches, discarded = filter_contained_matches([m1, m2]) assert [m2] == matches assert [m1] == discarded
def test_filter_matches_filters_matches_with_medium_overlap_only_if_license_are_the_same(self): r1 = Rule(text_file='r1', licenses=['apache-1.1']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 10), ispan=Span(0, 10)) m2 = LicenseMatch(rule=r1, qspan=Span(3, 11), ispan=Span(3, 11)) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m3 = LicenseMatch(rule=r2, qspan=Span(7, 15), ispan=Span(7, 15)) result, discarded = filter_contained_matches([m1, m2, m3]) assert sorted([m1, m3]) == sorted(result) assert discarded
def test_filter_matches_filters_non_contiguous_or_overlapping__but_contained_matches(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(1, 2), ispan=Span(1, 2)) m2 = LicenseMatch(rule=r1, qspan=Span(3, 6), ispan=Span(3, 6)) m3 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) m4 = LicenseMatch(rule=r1, qspan=Span(0, 7), ispan=Span(0, 7)) m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) result, discarded = filter_contained_matches([m1, m2, m3, m4, m5]) assert [m4] == result assert discarded
def test_filter_prefers_longer_overlaping_matches(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5)) same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 8), ispan=Span(1, 8)) matches, discarded = filter_contained_matches([overlap, same_span1, same_span2]) assert [same_span2] == matches assert discarded
def test_filter_does_filter_overlaping_matches_with_same_licensings(self): r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl') r2 = Rule(text_file='r2', license_expression='apache-2.0 OR gpl') overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5)) same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6)) matches, discarded = filter_contained_matches( [overlap, same_span1, same_span2]) assert [overlap] == matches assert discarded
def test_filter_filters_matches_with_same_spans_if_licenses_are_identical_but_rule_differ(self): r1 = Rule(text_file='r1', licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2)) m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6)) r2 = Rule(text_file='r2', licenses=['apache-2.0']) m2 = LicenseMatch(rule=r2, qspan=Span(0, 2), ispan=Span(0, 2)) matches, discarded = filter_contained_matches([m1, m2, m5]) assert [m5] == matches assert discarded
def test_filter_matches_does_filter_matches_with_contained_spans_if_licenses_are_different(self): r1 = Rule(text_file='r1', licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2)) r2 = Rule(text_file='r2', licenses=['apache-2.0']) m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6)) r3 = Rule(text_file='r3', licenses=['apache-1.1']) m3 = LicenseMatch(rule=r3, qspan=Span(0, 2), ispan=Span(0, 2)) matches, discarded = filter_contained_matches([m1, m2, m3]) assert [m2] == matches assert discarded
def test_filter_does_not_filter_multiple_contained_matches_across_rules(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5)) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r2, qspan=Span(1, 2), ispan=Span(1, 2)) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) contained2 = LicenseMatch(rule=r3, qspan=Span(3, 4), ispan=Span(3, 4)) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6)) result, _discarded = filter_contained_matches([m1, contained1, contained2, m5]) assert [m1] == result
def test_filter_multiple_contained_matches(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5)) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r2, qspan=Span(1, 2), ispan=Span(1, 2)) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) contained2 = LicenseMatch(rule=r3, qspan=Span(3, 4), ispan=Span(3, 4)) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6)) matches, discarded = filter_contained_matches([m1, contained1, contained2, m5]) assert [m1] == matches assert sorted([m5, contained1, contained2, ]) == sorted(discarded)
def test_filter_matches_filters_non_contiguous_or_overlapping_contained_matches_with_touching_boundaries(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2)) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) m2 = LicenseMatch(rule=r2, qspan=Span(3, 7), ispan=Span(3, 7)) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) m3 = LicenseMatch(rule=r3, qspan=Span(0, 6), ispan=Span(0, 6)) r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl']) m6 = LicenseMatch(rule=r6, qspan=Span(1, 7), ispan=Span(1, 7)) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6)) r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl']) m4 = LicenseMatch(rule=r4, qspan=Span(0, 7), ispan=Span(0, 7)) result, discarded = filter_contained_matches([m1, m2, m3, m4, m5, m6]) assert [m4] == result assert discarded