def test_match_exact_from_string_twice_with_repeated_text(self): _text = u'licensed under the GPL, licensed under the GPL' # 0 1 2 3 4 5 6 7 licenses = ['tst'] rule = models.Rule(licenses=licenses, _text=_text) idx = index.LicenseIndex([rule]) querys = u'Hi licensed under the GPL, licensed under the GPL yes.' # 0 1 2 3 4 5 6 7 8 9 result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) assert 'licensed under the GPL licensed under the GPL' == qtext assert 'licensed under the GPL licensed under the GPL' == itext assert Span(0, 7) == match.qspan assert Span(0, 7) == match.ispan # match again to ensure that there are no state side effects result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] assert Span(0, 7) == match.qspan assert Span(0, 7) == match.ispan qtext, itext = get_texts(match, query_string=querys, idx=idx) assert u'licensed under the GPL licensed under the GPL' == qtext assert u'licensed under the GPL licensed under the GPL' == itext
def test_match_can_match_with_simple_rule_template2(self): rule_text = u''' IN NO EVENT SHALL THE {{X CONSORTIUM}} BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' rule = Rule(_text=rule_text, licenses=['x-consortium']) idx = index.LicenseIndex([rule]) query_string = u''' IN NO EVENT SHALL THE Y CORP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' matches = idx.match(query_string=query_string) assert 1 == len(matches) match = matches[0] qtext, itext = get_texts(match, query_string=query_string, idx=idx) expected_qtokens = u''' IN NO EVENT SHALL THE [Y] [CORP] BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE '''.split() expected_itokens = u''' IN NO EVENT SHALL THE BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE '''.split() assert expected_qtokens == qtext.split() assert expected_itokens == itext.split()
def test_match_with_templates_with_redundant_tokens_yield_single_exact_match(self): _text = u'copyright reserved mit is license, {{}} copyright reserved mit is license' # 0 1 2 3 4 5 6 7 8 9 licenses = ['tst'] rule = models.Rule(licenses=licenses, _text=_text) idx = index.LicenseIndex([rule]) expected_idx = {'_tst_73_0': {u'copyright': [0, 5], u'license': [4, 9], u'mit': [2, 7]}} assert expected_idx == idx.to_dict() querys = u'Hi my copyright reserved mit is license is the copyright reserved mit is license yes.' # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 qry = Query(query_string=querys, idx=idx) # convert tid to actual token strings tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks] expected = [None, None, u'copyright', u'reserved', u'mit', u'is', u'license', u'is', None, u'copyright', u'reserved', u'mit', u'is', u'license', None] # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 assert expected == tks_as_str(qry.tokens_with_unknowns()) result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] assert Span(0, 4) | Span(6, 10) == match.qspan assert Span(0, 9) == match.ispan assert 100 == match.coverage() qtext, itext = get_texts(match, query_string=querys, idx=idx) assert 'copyright reserved mit is license <is> [the] copyright reserved mit is license' == qtext assert 'copyright reserved mit is license copyright reserved mit is license' == itext
def test_match_has_correct_line_positions_for_query_with_repeats(self): expected = [ # licenses, match.lines(), qtext, ([u'apache-2.0'], (1, 2), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' ), ([u'apache-2.0'], (3, 4), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' ), ([u'apache-2.0'], (5, 6), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' ), ([u'apache-2.0'], (7, 8), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' ), ([u'apache-2.0'], (9, 10), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' ), ] test_path = 'positions/license1.txt' test_location = self.get_test_loc(test_path) idx = cache.get_index() matches = idx.match(test_location) for i, match in enumerate(matches): ex_lics, ex_lines, ex_qtext = expected[i] qtext, _itext = get_texts(match, location=test_location, idx=idx) try: assert ex_lics == match.rule.licenses assert ex_lines == match.lines() assert ex_qtext == qtext except AssertionError: assert expected[i] == (match.rule.licenses, match.lines(), qtext)
def test_match_matches_correctly_simple_exact_query_across_query_runs(self): tf1 = self.get_test_loc('detect/mit/mit.c') ftr = Rule(text_file=tf1, licenses=['mit']) idx = index.LicenseIndex([ftr]) query_doc = self.get_test_loc('detect/mit/mit3.c') matches = idx.match(query_doc) assert 1 == len(matches) match = matches[0] qtext, itext = get_texts(match, location=query_doc, idx=idx) expected_qtext = u''' Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in THE SOFTWARE WITHOUT RESTRICTION INCLUDING WITHOUT LIMITATION THE RIGHTS TO USE COPY MODIFY MERGE PUBLISH DISTRIBUTE SUBLICENSE AND OR SELL COPIES of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_qtext == qtext.split() expected_itext = u''' Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_itext == itext.split()
def test_match_matches_correctly_simple_exact_query_across_query_runs( self): tf1 = self.get_test_loc('detect/mit/mit.c') ftr = Rule(text_file=tf1, licenses=['mit']) idx = index.LicenseIndex([ftr]) query_doc = self.get_test_loc('detect/mit/mit3.c') matches = idx.match(query_doc) assert 1 == len(matches) match = matches[0] qtext, itext = get_texts(match, location=query_doc, idx=idx) expected_qtext = u''' Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in THE SOFTWARE WITHOUT RESTRICTION INCLUDING WITHOUT LIMITATION THE RIGHTS TO USE COPY MODIFY MERGE PUBLISH DISTRIBUTE SUBLICENSE AND OR SELL COPIES of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_qtext == qtext.split() expected_itext = u''' Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_itext == itext.split()
def test_overlap_detection5(self): # test this containment relationship between test and index licenses: # * Index licenses: # +-license 2 --------+ # | +-license 1 --+ | # +-------------------+ # # +-license 4 --------+ # | +-license 1 --+ | # +-------------------+ # setup index license1 = '''Redistribution and use permitted for MIT license.''' license2 = '''Redistributions of source must retain copyright. Redistribution and use permitted for MIT license. Redistributions in binary form is permitted.''' rule1 = Rule(_text=license1, licenses=['overlap']) rule2 = Rule(_text=license2, licenses=['overlap']) idx = index.LicenseIndex([rule1, rule2]) querys = '''My source. Redistribution and use permitted for MIT license. My code.''' # test : querys contains license1: return license1 as exact coverage matches = idx.match(query_string=querys) assert 1 == len(matches) match = matches[0] assert rule1 == match.rule qtext, _itext = get_texts(match, query_string=querys, idx=idx) assert 'Redistribution and use permitted for MIT license' == qtext
def closure_test_function(*args, **kwargs): idx = cache.get_index() matches = idx.match( location=test_file, min_score=min_score, # if negative, do not detect negative rules when testing negative rules detect_negative=detect_negative) if not matches: matches = [] # TODO: we should expect matches properly, not with a grab bag of flat license keys # flattened list of all detected license keys across all matches. detected_licenses = functional.flatten( map(unicode, match.rule.licenses) for match in matches) try: if not detect_negative: # we skipped negative detection for a negative rule # we just want to ensure that the rule was matched proper assert matches and not expected_licenses and not detected_licenses else: assert expected_licenses == detected_licenses except: # On failure, we compare against more result data to get additional # failure details, including the test_file and full match details match_failure_trace = [] if trace_text: for match in matches: qtext, itext = get_texts(match, location=test_file, idx=idx) rule_text_file = match.rule.text_file rule_data_file = match.rule.data_file match_failure_trace.extend([ '', '', '======= MATCH ====', match, '======= Matched Query Text for:', 'file://{test_file}'.format(**locals()) ]) if test_data_file: match_failure_trace.append( 'file://{test_data_file}'.format(**locals())) match_failure_trace.append(qtext.splitlines()) match_failure_trace.extend([ '', '======= Matched Rule Text for:' 'file://{rule_text_file}'.format(**locals()), 'file://{rule_data_file}'.format(**locals()), itext.splitlines(), ]) # this assert will always fail and provide a detailed failure trace assert expected_licenses == detected_licenses + [ test_name, 'test file: file://' + test_file ] + match_failure_trace
def print_matched_texts(match, location=None, query_string=None, idx=None): """ Convenience function to print matched texts for tracing and debugging tests. """ qtext, itext = get_texts(match, location=location, query_string=query_string, idx=idx) print() print('Matched qtext:') print(qtext) print() print('Matched itext:') print(itext)
def test_match_in_binary_lkms_1(self): idx = cache.get_index() qloc = self.get_test_loc('positions/ath_pci.ko') matches = idx.match(location=qloc) assert 1 == len(matches) match = matches[0] assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses qtext, itext = get_texts(match, location=qloc, idx=idx) assert 'license Dual BSD GPL' == qtext assert 'license Dual BSD GPL' == itext
def test_match_in_binary_lkms_2(self): idx = cache.get_index() qloc = self.get_test_loc('positions/eeepc_acpi.ko') matches = idx.match(location=qloc) assert 1 == len(matches) match = matches[0] assert ['gpl-1.0-plus'] == match.rule.licenses assert match.ispan == Span(0, 1) qtext, itext = get_texts(match, location=qloc, idx=idx) assert 'license GPL' == qtext assert 'License GPL' == itext
def test_match_can_match_with_plain_rule_simple2(self): rule_text = u'''X11 License Copyright (C) 1996 X Consortium Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of the X Consortium shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from the X Consortium. X Window System is a trademark of X Consortium, Inc. ''' rule = Rule(_text=rule_text, licenses=['x-consortium']) idx = index.LicenseIndex([rule]) query_loc = self.get_test_loc( 'detect/simple_detection/x11-xconsortium_text.txt') matches = idx.match(location=query_loc) assert 1 == len(matches) expected_qtext = u''' X11 License Copyright C 1996 X Consortium Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software THE SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE Except as contained in this notice the name of the X Consortium shall not be used in advertising or otherwise to promote the sale use or other dealings in this Software without prior written authorization from the X Consortium X Window System is a trademark of X Consortium Inc '''.split() match = matches[0] qtext, _itext = get_texts(match, location=query_loc, idx=idx) assert expected_qtext == qtext.split()
def test_match_in_binary_lkms_2(self): idx = cache.get_index() qloc = self.get_test_loc('positions/eeepc_acpi.ko') matches = idx.match(location=qloc) assert 1 == len(matches) match = matches[0] assert ['gpl'] == match.rule.licenses assert match.ispan == Span(0, 1) qtext, itext = get_texts(match, location=qloc, idx=idx) assert 'license GPL' == qtext assert 'License GPL' == itext
def test_match_return_correct_positions_with_short_index_and_queries(self): idx = index.LicenseIndex([Rule(_text='MIT License', licenses=['mit'])]) matches = idx.match(query_string='MIT License') assert 1 == len(matches) assert {'_tst_11_0': {'mit': [0]}} == idx.to_dict() qtext, itext = get_texts(matches[0], query_string='MIT License', idx=idx) assert 'MIT License' == qtext assert 'MIT License' == itext assert Span(0, 1) == matches[0].qspan assert Span(0, 1) == matches[0].ispan matches = idx.match(query_string='MIT MIT License') assert 1 == len(matches) qtext, itext = get_texts(matches[0], query_string='MIT MIT License', idx=idx) assert 'MIT License' == qtext assert 'MIT License' == itext assert Span(1, 2) == matches[0].qspan assert Span(0, 1) == matches[0].ispan query_doc1 = 'do you think I am a mit license MIT License, yes, I think so' # # 0 1 2 3 matches = idx.match(query_string=query_doc1) assert 2 == len(matches) qtext, itext = get_texts(matches[0], query_string=query_doc1, idx=idx) assert 'mit license' == qtext assert 'MIT License' == itext assert Span(0, 1) == matches[0].qspan assert Span(0, 1) == matches[0].ispan qtext, itext = get_texts(matches[1], query_string=query_doc1, idx=idx) assert 'MIT License' == qtext assert 'MIT License' == itext assert Span(2, 3) == matches[1].qspan assert Span(0, 1) == matches[1].ispan query_doc2 = '''do you think I am a mit license MIT License yes, I think so''' matches = idx.match(query_string=query_doc2) assert 2 == len(matches) qtext, itext = get_texts(matches[0], query_string=query_doc2, idx=idx) assert 'mit license' == qtext assert 'MIT License' == itext assert Span(0, 1) == matches[0].qspan assert Span(0, 1) == matches[0].ispan qtext, itext = get_texts(matches[1], query_string=query_doc2, idx=idx) assert 'MIT License' == qtext assert 'MIT License' == itext assert Span(2, 3) == matches[1].qspan assert Span(0, 1) == matches[1].ispan
def test_match_can_match_with_plain_rule_simple2(self): rule_text = u'''X11 License Copyright (C) 1996 X Consortium Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of the X Consortium shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from the X Consortium. X Window System is a trademark of X Consortium, Inc. ''' rule = Rule(_text=rule_text, licenses=['x-consortium']) idx = index.LicenseIndex([rule]) query_loc = self.get_test_loc('detect/simple_detection/x11-xconsortium_text.txt') matches = idx.match(location=query_loc) assert 1 == len(matches) expected_qtext = u''' X11 License Copyright C 1996 X Consortium Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software THE SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE Except as contained in this notice the name of the X Consortium shall not be used in advertising or otherwise to promote the sale use or other dealings in this Software without prior written authorization from the X Consortium X Window System is a trademark of X Consortium Inc '''.split() match = matches[0] qtext, _itext = get_texts(match, location=query_loc, idx=idx) assert expected_qtext == qtext.split()
def test_match_in_binary_lkms_3(self): idx = cache.get_index() qloc = self.get_test_loc('positions/wlan_xauth.ko') matches = idx.match(location=qloc) assert 1 == len(matches) match = matches[0] assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses assert 100 == match.coverage() assert 20 == match.score() qtext, itext = get_texts(match, location=qloc, idx=idx) assert 'license Dual BSD GPL' == qtext assert 'license Dual BSD GPL' == itext assert Span(0, 3) == match.ispan
def test_overlap_detection1(self): # test this containment relationship between test and index licenses: # * Index licenses: # +-license 2 --------+ # | +-license 1 --+ | # +-------------------+ # # * License texts to detect: # +- license 3 -----------+ # | +-license 2 --------+ | # | | +-license 1 --+ | | # | +-------------------+ | # +-----------------------+ # # +-license 4 --------+ # | +-license 1 --+ | # +-------------------+ # setup index license1 = '''Redistribution and use permitted.''' license2 = '''Redistributions of source must retain copyright. Redistribution and use permitted. Redistributions in binary form is permitted.''' license3 = ''' this license source Redistributions of source must retain copyright. Redistribution and use permitted. Redistributions in binary form is permitted. has a permitted license''' license4 = '''My Redistributions is permitted. Redistribution and use permitted. Use is permitted too.''' rule1 = Rule(_text=license1, licenses=['overlap']) rule2 = Rule(_text=license2, licenses=['overlap']) rule3 = Rule(_text=license3, licenses=['overlap']) rule4 = Rule(_text=license4, licenses=['overlap']) idx = index.LicenseIndex([rule1, rule2, rule3, rule4]) querys = 'Redistribution and use bla permitted.' # test : license1 is in the index and contains no other rule. should return rule1 at exact coverage. matches = idx.match(query_string=querys) assert 1 == len(matches) match = matches[0] assert Span(0, 3) == match.qspan assert rule1 == match.rule qtext, _itext = get_texts(match, query_string=querys, idx=idx) assert 'Redistribution and use [bla] permitted' == qtext
def test_match_exact_with_junk_in_between_good_tokens(self): _text = u'licensed under the GPL, licensed under the GPL' licenses = ['tst'] rule = models.Rule(licenses=licenses, _text=_text) idx = index.LicenseIndex([rule]) querys = u'Hi licensed that under is the that GPL, licensed or under not the GPL by yes.' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) assert u'licensed [that] under [is] the [that] GPL licensed [or] under [not] the GPL' == qtext assert u'licensed under the GPL licensed under the GPL' == itext
def test_match_exact_from_file(self): idx = index.LicenseIndex(self.get_test_rules('index/mini')) query_loc = self.get_test_loc('index/queryperfect-mini') result = idx.match(location=query_loc) assert 1 == len(result) match = result[0] qtext, itext = get_texts(match, location=query_loc, idx=idx) assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext assert Span(0, 13) == match.qspan assert Span(0, 13) == match.ispan
def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched( self): # was failing when a gapped token (from a template) starts at a # beginning of an index doc. We may still skip that, but capture a large match anyway. rule_text = u''' Copyright {{some copyright}} THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE ''' rule = Rule( _text=rule_text, licenses=['test'], ) idx = index.LicenseIndex([rule]) querys = u''' Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] assert match_seq.MATCH_SEQ == match.matcher exp_qtext = u""" Copyright [2003] [C] [James] [All] [Rights] [Reserved] THIS IS FROM <THE> [CODEHAUS] AND CONTRIBUTORS IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE """.split() exp_itext = u""" Copyright THIS IS FROM AND CONTRIBUTORS IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE DAMAGE """.split() qtext, itext = get_texts(match, query_string=querys, idx=idx) assert exp_qtext == qtext.split() assert exp_qtext == qtext.split() assert exp_itext == itext.split() assert 99 <= match.coverage()
def test_match_return_one_match_with_correct_offsets(self): idx = index.LicenseIndex([Rule(_text='A one. a license two. A three.', licenses=['abc'])]) querys = u'some junk. A one. A license two. A three.' # 0 1 2 3 4 5 6 7 8 matches = idx.match(query_string=querys) assert 1 == len(matches) match = matches[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) assert 'A one A license two A three' == qtext assert 'A one a license two A three' == itext assert Span(0, 6) == match.qspan assert Span(0, 6) == match.ispan
def test_match_works_for_apache_rule(self): idx = cache.get_index() querys = u'''I am not a license. The Apache Software License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt ''' matches = idx.match(query_string=querys) assert 1 == len(matches) match = matches[0] assert 'apache-2.0_8.RULE' == match.rule.identifier assert match_aho.MATCH_AHO_EXACT == match.matcher qtext, _itext = get_texts(match, query_string=querys, idx=idx) assert u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext assert (3, 4) == match.lines()
def closure_test_function(*args, **kwargs): idx = cache.get_index() matches = idx.match(location=test_file, min_score=min_score, # if negative, do not detect negative rules when testing negative rules detect_negative=detect_negative) if not matches: matches = [] # TODO: we should expect matches properly, not with a grab bag of flat license keys # flattened list of all detected license keys across all matches. detected_licenses = functional.flatten(map(unicode, match.rule.licenses) for match in matches) try: if not detect_negative: # we skipped negative detection for a negative rule # we just want to ensure that the rule was matched proper assert matches and not expected_licenses and not detected_licenses else: assert expected_licenses == detected_licenses except: # On failure, we compare against more result data to get additional # failure details, including the test_file and full match details match_failure_trace = [] if trace_text: for match in matches: qtext, itext = get_texts(match, location=test_file, idx=idx) rule_text_file = match.rule.text_file rule_data_file = match.rule.data_file match_failure_trace.extend(['', '', '======= MATCH ====', match, '======= Matched Query Text for:', 'file://{test_file}'.format(**locals()) ]) if test_data_file: match_failure_trace.append('file://{test_data_file}'.format(**locals())) match_failure_trace.append(qtext.splitlines()) match_failure_trace.extend(['', '======= Matched Rule Text for:' 'file://{rule_text_file}'.format(**locals()), 'file://{rule_data_file}'.format(**locals()), itext.splitlines(), ]) # this assert will always fail and provide a detailed failure trace assert expected_licenses == detected_licenses + [test_name, 'test file: file://' + test_file] + match_failure_trace
def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched(self): # was failing when a gapped token (from a template) starts at a # beginning of an index doc. We may still skip that, but capture a large match anyway. rule_text = u''' Copyright {{some copyright}} THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE ''' rule = Rule(_text=rule_text, licenses=['test'],) idx = index.LicenseIndex([rule]) querys = u''' Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] assert match_seq.MATCH_SEQ == match.matcher exp_qtext = u""" Copyright [2003] [C] [James] [All] [Rights] [Reserved] THIS IS FROM <THE> [CODEHAUS] AND CONTRIBUTORS IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE """.split() exp_itext = u""" Copyright THIS IS FROM AND CONTRIBUTORS IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE DAMAGE """.split() qtext, itext = get_texts(match, query_string=querys, idx=idx) assert exp_qtext == qtext.split() assert exp_qtext == qtext.split() assert exp_itext == itext.split() assert 99 <= match.coverage()
def debug_matches(self, matches, message, location=None, query_string=None, with_text=False, query=None): if TRACE or TRACE_NEGATIVE: logger_debug(message + ':', len(matches)) if query: # set line early to ease debugging match.set_lines(matches, query.line_by_pos) if TRACE_MATCHES or TRACE_NEGATIVE: map(logger_debug, matches) if (TRACE_MATCHES_TEXT or TRACE_NEGATIVE) and with_text: logger_debug(message + ' MATCHED TEXTS') for m in matches: logger_debug(m) qt, it = match.get_texts(m, location, query_string, self) print(' MATCHED QUERY TEXT:', qt) print(' MATCHED RULE TEXT:', it) print()
def test_match_exact_from_string_once(self): rule_text = 'Redistribution and use in source and binary forms, with or without modification, are permitted' idx = index.LicenseIndex([models.Rule(_text=rule_text, licenses=['bsd'])]) querys = ''' The Redistribution and use in source and binary forms, with or without modification, are permitted. Always''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext assert Span(0, 13) == match.qspan assert Span(0, 13) == match.ispan
def test_match_return_correct_offsets(self): _text = u'A GPL. A MIT. A LGPL.' # 0 1 2 3 4 5 licenses = ['test'] rule = models.Rule(licenses=licenses, _text=_text) idx = index.LicenseIndex([rule]) querys = u'some junk. A GPL. A MIT. A LGPL.' # 0 1 2 3 4 5 6 7 result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) assert 'A GPL A MIT A LGPL' == qtext assert 'A GPL A MIT A LGPL' == itext assert Span(0, 5) == match.qspan assert Span(0, 5) == match.ispan
def test_match_can_match_with_rule_template_for_public_domain(self): test_text = ''' I hereby abandon any property rights to {{SAX 2.0 (the Simple API for XML)}}, and release all of {{the SAX 2.0 }} source code, compiled code, and documentation contained in this distribution into the Public Domain. ''' rule = Rule(_text=test_text, licenses=['public-domain']) idx = index.LicenseIndex([rule]) querys = ''' SAX2 is Free! I hereby abandon any property rights to SAX 2.0 (the Simple API for XML), and release all of the SAX 2.0 source code, compiled code, and documentation contained in this distribution into the Public Domain. SAX comes with NO WARRANTY or guarantee of fitness for any purpose. SAX2 is Free! ''' matches = idx.match(query_string=querys) assert 1 == len(matches) match = matches[0] qtext, itext = get_texts(match, query_string=querys, idx=idx) expected_qtext = u''' I hereby abandon any property rights to [SAX] [2] [0] <the> [Simple] [API] [for] [XML] <and> <release> <all> <of> <the> [SAX] [2] [0] source code compiled code and documentation contained in this distribution into the Public Domain '''.split() assert expected_qtext == qtext.split() expected_itext = u''' I hereby abandon any property rights to <and> <release> <all> <of> source code compiled code and documentation contained in this distribution into the Public Domain '''.split() assert expected_itext == itext.split() assert 84 == match.coverage() assert 70 == match.score() assert Span(0, 6) | Span(13, 26) == match.qspan assert Span(0, 6) | Span(11, 24) == match.ispan
def test_fulltext_detection_works_with_partial_overlap_from_location(self): test_doc = self.get_test_loc('detect/templates/license3.txt') idx = index.LicenseIndex([Rule(text_file=test_doc, licenses=['mylicense'])]) query_loc = self.get_test_loc('detect/templates/license4.txt') matches = idx.match(query_loc) assert 1 == len(matches) match = matches[0] assert Span(0, 41) == match.qspan assert Span(0, 41) == match.ispan assert 100 == match.coverage() assert 100 == match.score() qtext, _itext = get_texts(match, location=query_loc, idx=idx) expected = ''' is free software you can redistribute it and or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation either version 2 1 of the License or at your option any later version '''.split() assert expected == qtext.split()
def debug_matches(self, matches, message, location=None, query_string=None, with_text=False): if TRACE or TRACE_NEGATIVE: logger_debug(message + ':', len(matches)) if TRACE_MATCHES or TRACE_NEGATIVE: map(logger_debug, matches) if (TRACE_MATCHES_TEXT or TRACE_NEGATIVE) and with_text: logger_debug(message + ' MATCHED TEXTS') for m in matches: logger_debug(m) qt, it = get_texts(m, location, query_string, self) print(' MATCHED QUERY TEXT') print(qt) print(' MATCHED RULE TEXT') print(it) print()
def test_match_with_surrounding_junk_should_return_an_exact_match(self): tf1 = self.get_test_loc('detect/mit/mit.c') ftr = Rule(text_file=tf1, licenses=['mit']) idx = index.LicenseIndex([ftr]) query_loc = self.get_test_loc('detect/mit/mit4.c') matches = idx.match(query_loc) assert len(matches) == 1 match = matches[0] qtext, itext = get_texts(match, location=query_loc, idx=idx) expected_qtext = u''' Permission [add] [text] is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright [add] [text] notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_qtext == qtext.split() expected_itext = u''' Permission is hereby granted free of charge to any person obtaining a copy of this software and associated documentation files the Software to deal in the Software without restriction including without limitation the rights to use copy modify merge publish distribute sublicense and or sell copies of the Software and to permit persons to whom the Software is furnished to do so subject to the following conditions The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software '''.split() assert expected_itext == itext.split() assert Span(0, 86) == match.qspan assert Span(0, 86) == match.ispan assert 95.6 == match.score()
def test_match_seq_are_correct_on_apache(self): rule_dir = self.get_test_loc('match_seq/rules') idx = index.LicenseIndex(load_rules(rule_dir)) query_loc = self.get_test_loc('match_seq/query') matches = idx.match(location=query_loc) assert 1 == len(matches) match = matches[0] assert match_seq.MATCH_SEQ == match.matcher qtext, _itext = get_texts(match, location=query_loc, idx=idx) expected = u''' The OpenSymphony Group All rights reserved Redistribution and use in source and binary forms with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain the above copyright notice this list of conditions and the following disclaimer 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The end user documentation included with the redistribution if any must include the following acknowledgment <4> <This> <product> <includes> <software> <developed> <by> <the> <OpenSymphony> <Group> <http> <www> <opensymphony> <com> <5> Alternately this acknowledgment may appear in the software itself if and wherever such third party acknowledgments normally appear The names OpenSymphony and The OpenSymphony Group must not be used to endorse or promote products derived from this software without prior written permission For written permission please contact license opensymphony com Products derived from this software may not be called OpenSymphony or [OsCore] nor may OpenSymphony or [OsCore] appear in their name without prior written permission of the OpenSymphony Group THIS SOFTWARE IS PROVIDED AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE ''' assert expected.split() == qtext.split()
def test_match_has_correct_line_positions_for_query_with_repeats(self): expected = [ # licenses, match.lines(), qtext, ([u'apache-2.0'], (1, 2), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'), ([u'apache-2.0'], (3, 4), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'), ([u'apache-2.0'], (5, 6), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'), ([u'apache-2.0'], (7, 8), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'), ([u'apache-2.0'], (9, 10), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'), ] test_path = 'positions/license1.txt' test_location = self.get_test_loc(test_path) idx = cache.get_index() matches = idx.match(test_location) for i, match in enumerate(matches): ex_lics, ex_lines, ex_qtext = expected[i] qtext, _itext = get_texts(match, location=test_location, idx=idx) try: assert ex_lics == match.rule.licenses assert ex_lines == match.lines() assert ex_qtext == qtext except AssertionError: assert expected[i] == (match.rule.licenses, match.lines(), qtext)
def match_sequence(idx, candidate, query_run, start_offset=0): """ Return a list of LicenseMatch by matching the `query_run` tokens sequence against the `idx` index for the `candidate` rule tuple (rid, rule, intersection). """ if not candidate: return [] rid, rule, _intersection = candidate high_postings = idx.high_postings_by_rid[rid] itokens = idx.tids_by_rid[rid] len_junk = idx.len_junk qbegin = query_run.start + start_offset qfinish = query_run.end qtokens = query_run.query.tokens query = query_run.query matches = [] qstart = qbegin qlen = len(query_run) # match as long as long we find alignments and have high matchable tokens # this allows to find repeated instances of the same rule in the query run query_run_matchables = query_run.matchables while qstart <= qfinish: if not query_run_matchables: break block_matches = match_blocks(qtokens, itokens, qstart, qlen, high_postings, len_junk, query_run_matchables) if not block_matches: break if TRACE2: logger_debug('block_matches:') for m in block_matches: i, j, k = m print(m) print('qtokens:', ' '.join(idx.tokens_by_tid[t] for t in qtokens[i:i + k])) print('itokens:', ' '.join(idx.tokens_by_tid[t] for t in itokens[j:j + k])) # create one match for each matching block: this not entirely correct # but this will be sorted out at LicenseMatch merging and filtering time for qpos, ipos, mlen in block_matches: qspan = Span(range(qpos, qpos + mlen)) iposses = range(ipos, ipos + mlen) hispan = Span(p for p in iposses if itokens[p] >= len_junk) ispan = Span(iposses) match = LicenseMatch(rule, qspan, ispan, hispan, qbegin, matcher=MATCH_SEQ, query=query) if TRACE2: qt, it = get_texts( match, location=query.location, query_string=query.query_string, idx=idx) print('###########################') print(match) print('###########################') print(qt) print('###########################') print(it) print('###########################') matches.append(match) qstart = max([qstart, qspan.end + 1]) if TRACE: map(logger_debug, matches) return matches
def test_match_with_template_and_multiple_rules(self): test_rules = self.get_test_rules('index/bsd_templates',) idx = index.LicenseIndex(test_rules) querys = u''' Hello, what about this Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of nexB Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Goodbye No part of match ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] assert match_seq.MATCH_SEQ == match.matcher exp_qtext = u""" Redistribution and use in source and binary forms with or without modification are permitted provided that the following conditions are met Redistributions of source code must retain the above copyright notice this list of conditions and the following disclaimer Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution Neither the name of [nexB] <Inc> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE """.split() exp_itext = u""" Redistribution and use in source and binary forms with or without modification are permitted provided that the following conditions are met Redistributions of source code must retain the above copyright notice this list of conditions and the following disclaimer Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution Neither the name of nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE """.split() # q = Query(query_string=querys, idx=idx) # print('######################') # print('######################') # print('q=', querys.lower().replace('*', ' ').replace('/', ' '). split()) # print('q2=', [None if t is None else idx.tokens_by_tid[t] for t in q.tokens_with_unknowns()]) # print('######################') qtext, itext = get_texts(match, query_string=querys, idx=idx) assert exp_qtext == qtext.split() assert exp_itext == itext.split() assert Span(Span(1, 72) | Span(74, 212)) == match.qspan assert Span(0, 210) == match.ispan assert 100 == match.coverage()
def test_match_to_indexed_template_with_few_tokens_around_gaps(self): # Was failing when a gap in a template starts very close to the start of # a rule tokens seq. We may still skip that, but we capture a large # match anyway. rule = models.Rule(text_file=self.get_test_loc('index/templates/idx.txt'), licenses=['test'],) idx = index.LicenseIndex([rule]) query_loc = self.get_test_loc('index/templates/query.txt') result = idx.match(location=query_loc) assert 1 == len(result) match = result[0] exp_qtext = u""" All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name [groovy] must not be used to endorse or promote products derived from this Software without prior written permission of <The> [Codehaus] For written permission please contact [info] [codehaus] [org] 4 Products derived from this Software may not be called [groovy] nor may [groovy] appear in their names without prior written permission of <The> [Codehaus] [groovy] is a registered trademark of <The> [Codehaus] 5 Due credit should be given to <The> [Codehaus] [http] [groovy] [codehaus] [org] <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] AND CONTRIBUTORS AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE """.split() exp_itext = u""" All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name must not be used to endorse or promote products derived from this Software without prior written permission of For written permission please contact 4 Products derived from this Software may not be called nor may appear in their names without prior written permission of is a registered trademark of 5 Due credit should be given to <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> AND CONTRIBUTORS AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE DAMAGE """.split() qtext, itext = get_texts(match, location=query_loc, idx=idx) assert exp_qtext == qtext.split() assert exp_itext == itext.split() assert match.coverage() > 97 assert match_seq.MATCH_SEQ == match.matcher
def test_match_can_match_with_rule_template_with_gap_near_start_with_few_tokens_before( self): # failed when a gapped token starts at a beginning of rule with few tokens before test_file = self.get_test_loc('detect/templates/license7.txt') rule = Rule(text_file=test_file, licenses=['lic']) idx = index.LicenseIndex([rule]) qloc = self.get_test_loc('detect/templates/license8.txt') matches = idx.match(qloc) assert 1 == len(matches) match = matches[0] expected_qtokens = u""" All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name [groovy] must not be used to endorse or promote products derived from this Software without prior written permission of <The> [Codehaus] For written permission please contact [info] [codehaus] [org] 4 Products derived from this Software may not be called [groovy] nor may [groovy] appear in their names without prior written permission of <The> [Codehaus] [groovy] is a registered trademark of <The> [Codehaus] 5 Due credit should be given to <The> [Codehaus] [http] [groovy] [codehaus] [org] <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] <AND> <CONTRIBUTORS> AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE """.split() expected_itokens = u''' All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name must not be used to endorse or promote products derived from this Software without prior written permission of For written permission please contact 4 Products derived from this Software may not be called nor may appear in their names without prior written permission of is a registered trademark of 5 Due credit should be given to <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE '''.split() qtext, itext = get_texts(match, location=qloc, idx=idx) assert expected_qtokens == qtext.split() assert expected_itokens == itext.split() assert 97.55 == match.coverage() assert 92.64 == match.score() expected = Span(2, 98) | Span(100, 125) | Span(127, 131) | Span( 133, 139) | Span(149, 178) | Span(180, 253) assert expected == match.qspan assert Span(1, 135) | Span(141, 244) == match.ispan
def test_match_can_match_with_rule_template_with_gap_near_start_with_few_tokens_before(self): # failed when a gapped token starts at a beginning of rule with few tokens before test_file = self.get_test_loc('detect/templates/license7.txt') rule = Rule(text_file=test_file, licenses=['lic']) idx = index.LicenseIndex([rule]) qloc = self.get_test_loc('detect/templates/license8.txt') matches = idx.match(qloc) assert 1 == len(matches) match = matches[0] expected_qtokens = u""" All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name [groovy] must not be used to endorse or promote products derived from this Software without prior written permission of <The> [Codehaus] For written permission please contact [info] [codehaus] [org] 4 Products derived from this Software may not be called [groovy] nor may [groovy] appear in their names without prior written permission of <The> [Codehaus] [groovy] is a registered trademark of <The> [Codehaus] 5 Due credit should be given to <The> [Codehaus] [http] [groovy] [codehaus] [org] <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] <AND> <CONTRIBUTORS> AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE """.split() expected_itokens = u''' All Rights Reserved Redistribution and use of this software and associated documentation Software with or without modification are permitted provided that the following conditions are met 1 Redistributions of source code must retain copyright statements and notices Redistributions must also contain a copy of this document 2 Redistributions in binary form must reproduce the above copyright notice this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution 3 The name must not be used to endorse or promote products derived from this Software without prior written permission of For written permission please contact 4 Products derived from this Software may not be called nor may appear in their names without prior written permission of is a registered trademark of 5 Due credit should be given to <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE '''.split() qtext, itext = get_texts(match, location=qloc, idx=idx) assert expected_qtokens == qtext.split() assert expected_itokens == itext.split() assert 97.55 == match.coverage() assert 92.64 == match.score() expected = Span(2, 98) | Span(100, 125) | Span(127, 131) | Span(133, 139) | Span(149, 178) | Span(180, 253) assert expected == match.qspan assert Span(1, 135) | Span(141, 244) == match.ispan