示例#1
0
    def test_match_exact_from_string_twice_with_repeated_text(self):
        _stored_text = u'licensed under the GPL, licensed under the GPL'
        #                0    1   2   3         4      5   6   7
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression,
                           stored_text=_stored_text)

        idx = index.LicenseIndex([rule])
        querys = u'Hi licensed under the GPL, licensed under the GPL yes.'
        #          0        1   2   3     4       5     6    7   8   9

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'licensed under the GPL licensed under the GPL' == qtext
        assert 'licensed under the gpl licensed under the gpl' == itext

        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        # match again to ensure that there are no state side effects
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert u'licensed under the GPL licensed under the GPL' == qtext
        assert u'licensed under the gpl licensed under the gpl' == itext
    def test_match_exact_from_string_twice_with_repeated_text(self):
        _stored_text = u'licensed under the GPL, licensed under the GPL'
        #                0    1   2   3         4      5   6   7
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression,
                           stored_text=_stored_text)

        idx = MiniLicenseIndex([rule])
        querys = u'Hi licensed under the GPL, licensed under the GPL yes.'
        #          0        1   2   3     4       5     6    7   8   9

        result = idx.match(query_string=querys)
        assert len(result) == 1
        match = result[0]
        qtext, itext = get_texts(match)
        assert qtext == 'licensed under the GPL, licensed under the GPL'
        assert itext == 'licensed under the gpl licensed under the gpl'

        assert match.qspan == Span(0, 7)
        assert match.ispan == Span(0, 7)

        # match again to ensure that there are no state side effects
        result = idx.match(query_string=querys)
        assert len(result) == 1
        match = result[0]
        assert match.qspan == Span(0, 7)
        assert match.ispan == Span(0, 7)

        qtext, itext = get_texts(match)
        assert qtext == u'licensed under the GPL, licensed under the GPL'
        assert itext == u'licensed under the gpl licensed under the gpl'
    def test_match_with_templates_with_redundant_tokens_yield_single_exact_match(self):
        _stored_text = u'copyright reserved mit is license, {{}} copyright reserved mit is license'
        #                 0        1  2   3       4               5        6   7  8       9
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression, stored_text=_stored_text)
        idx = MiniLicenseIndex([rule])

        querys = u'Hi my copyright reserved mit is license is the copyright reserved mit is license yes.'
        #           0  1         2        3   4  5       6  7   8         9       10  11 12      13  14
        qry = Query(query_string=querys, idx=idx)

        # convert tid to actual token strings
        tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks]

        expected = [None, None, u'copyright', u'reserved', u'mit', u'is', u'license', u'is', None, u'copyright', u'reserved', u'mit', u'is', u'license', None]
        #              0     1            2            3       4      5           6      7      8            9           10      11     12          13     14
        assert expected == tks_as_str(qry.tokens_with_unknowns())

        result = idx.match(query_string=querys)
        assert 1 == len(result)

        match = result[0]
        assert Span(0, 4) | Span(6, 10) == match.qspan
        assert Span(0, 9) == match.ispan
        assert 100 == match.coverage()
        qtext, itext = get_texts(match)
        assert 'copyright reserved mit is license [is] [the] copyright reserved mit is license' == qtext
        assert 'copyright reserved mit is license copyright reserved mit is license' == itext
    def test_match_can_match_with_simple_rule_template2(self):
        rule_text = u'''
        IN NO EVENT SHALL THE
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''
        rule = Rule(stored_text=rule_text, license_expression='x-consortium')
        idx = index.LicenseIndex([rule])

        query_string = u'''
        IN NO EVENT SHALL THE Y CORP
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''

        matches = idx.match(query_string=query_string)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=query_string, idx=idx)

        expected_qtokens = u'''
        IN NO EVENT SHALL THE [Y] [CORP] BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER
        LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT
        OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
        SOFTWARE
        '''.split()
        expected_itokens = u'''
        IN NO EVENT SHALL THE BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY
        WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN
        CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
        '''.lower().split()
        assert expected_qtokens == qtext.split()
        assert expected_itokens == itext.split()
示例#5
0
    def test_match_with_templates_with_redundant_tokens_yield_single_exact_match(self):
        from licensedcode_test_utils import query_tokens_with_unknowns  # NOQA

        _stored_text = 'copyright reserved mit is license, copyright reserved mit is license'
        #               0         1        2   3  4        5         6        7   8  9
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression, stored_text=_stored_text)
        idx = MiniLicenseIndex([rule])

        querys = u'Hi my copyright reserved mit is license is the copyright reserved mit is license yes.'
        #           0  1         2        3   4  5       6  7   8         9       10  11 12      13  14
        qry = Query(query_string=querys, idx=idx)

        # convert tid to actual token strings
        tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks]

        expected = [None, None, u'copyright', u'reserved', u'mit', u'is', u'license', u'is', None, u'copyright', u'reserved', u'mit', u'is', u'license', None]
        #              0     1            2            3       4      5           6      7      8            9           10      11     12          13     14
        assert tks_as_str(query_tokens_with_unknowns(qry)) == expected

        result = idx.match(query_string=querys)
        assert len(result) == 1

        match = result[0]
        assert match.qspan == Span(0, 4) | Span(6, 10)
        assert match.ispan == Span(0, 9)
        assert match.coverage() == 100
        qtext, itext = get_texts(match)
        assert qtext == 'copyright reserved mit is license [is] [the] copyright reserved mit is license'
        assert itext == 'copyright reserved mit is license copyright reserved mit is license'
示例#6
0
    def debug_matches(self,
                      matches,
                      message,
                      location=None,
                      query_string=None,
                      with_text=False,
                      qry=None):
        """
        Log debug-level data for a list of `matches`.
        """
        logger_debug(message + ':', len(matches))
        if qry:
            # set line early to ease debugging
            match.set_matched_lines(matches, qry.line_by_pos)

        if not with_text:
            for m in matches:
                logger_debug(m)
        else:
            logger_debug(message + ' MATCHED TEXTS')

            from licensedcode.tracing import get_texts

            for m in matches:
                logger_debug(m)
                qt, it = get_texts(m)
                logger_debug('  MATCHED QUERY TEXT:', qt)
                logger_debug('  MATCHED RULE TEXT:', it)
示例#7
0
文件: index.py 项目: vsurge/barista
    def debug_matches(self,
                      matches,
                      message,
                      location=None,
                      query_string=None,
                      with_text=False,
                      query=None):
        if TRACE or TRACE_NEGATIVE:
            logger_debug(message + ':', len(matches))
            if query:
                # set line early to ease debugging
                match.set_lines(matches, query.line_by_pos)

            if TRACE_MATCHES or TRACE_NEGATIVE:
                map(logger_debug, matches)

            if (TRACE_MATCHES_TEXT or TRACE_NEGATIVE) and with_text:
                logger_debug(message + ' MATCHED TEXTS')

                from licensedcode.tracing import get_texts

                for m in matches:
                    logger_debug(m)
                    qt, it = get_texts(m, location, query_string, self)
                    print('  MATCHED QUERY TEXT:', qt)
                    print('  MATCHED RULE TEXT:', it)
                    print()
    def test_match_matches_correctly_simple_exact_query_across_query_runs(self):
        tf1 = self.get_test_loc('detect/mit/mit.c')
        ftr = Rule(text_file=tf1, license_expression='mit')
        idx = index.LicenseIndex([ftr])
        query_doc = self.get_test_loc('detect/mit/mit3.c')
        matches = idx.match(query_doc)
        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, location=query_doc, idx=idx)
        expected_qtext = u'''
            Permission is hereby granted free of charge to any person obtaining
            copy of this software and associated documentation files the Software to
            deal in THE SOFTWARE WITHOUT RESTRICTION INCLUDING WITHOUT LIMITATION THE
            RIGHTS TO USE COPY MODIFY MERGE PUBLISH DISTRIBUTE SUBLICENSE AND OR SELL
            COPIES of the Software and to permit persons to whom the Software is
            furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
            Permission is hereby granted free of charge to any person obtaining
            copy of this software and associated documentation files the Software to
            deal in the Software without restriction including without limitation
            the rights to use copy modify merge publish distribute sublicense and or
            sell copies of the Software and to permit persons to whom the Software
            is furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.lower().split()
        assert expected_itext == itext.split()
    def test_overlap_detection5(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted for MIT license.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted for MIT license.
        Redistributions in binary form is permitted.'''

        rule1 = Rule(stored_text=license1, license_expression='overlap')
        rule2 = Rule(stored_text=license2, license_expression='overlap')
        idx = index.LicenseIndex([rule1, rule2])

        querys = '''My source.
        Redistribution and use permitted for MIT license.
        My code.'''

        # test : querys contains license1: return license1 as exact coverage
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)

        match = matches[0]
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use permitted for MIT license' == qtext
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')

        legalese = (mini_legalese
                    | set([
                        'redistributions', 'written', 'registered', 'derived',
                        'damage', 'due', 'alternately', 'nor'
                    ]))

        idx = index.LicenseIndex(load_rules(rule_dir), _legalese=legalese)

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert len(matches) == 1
        match = matches[0]
        assert match.matcher == match_seq.MATCH_SEQ
        qtext, _itext = get_texts(match)
        expected = u'''
            The OpenSymphony Group. All rights reserved.

            Redistribution and use in source and binary forms, with or without modification,
            are permitted provided that the following conditions are met:

            1. Redistributions of source code must retain the above copyright notice, this
            list of conditions and the following disclaimer.

            2. Redistributions in binary form must reproduce the above copyright notice,
            this list of conditions and the following disclaimer in the documentation and/or
            other materials provided with the distribution.

            3. The end-user documentation included with the redistribution, if any, must
            include the following acknowledgment:

            [4]. "[This] [product] [includes] [software] [developed] [by] [the] [OpenSymphony] [Group]
            ([http]://[www].[opensymphony].[com]/)."

            [5]. Alternately, this acknowledgment may appear in the software itself, if and
            wherever such third-party acknowledgments normally appear.

            The names "OpenSymphony" and "The OpenSymphony Group" must not be used to
            endorse or promote products derived from this software without prior written
            permission. For written permission, please contact [email protected] .

            Products derived from this software may not be called "OpenSymphony" or
            "[OsCore]", nor may "OpenSymphony" or "[OsCore]" appear in their name, without prior
            written permission of the OpenSymphony Group.

            THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
            INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
            FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE
            SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
            INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
            LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
            PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
            LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
            OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
            ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        assert qtext.split() == expected.split()
    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(location=test_file, min_score=0)
        if not matches:
            matches = []

        detected_expressions = [match.rule.license_expression for match in matches]

        # use detection as expected and dump test back
        if regen:
            if not expected_failure:
                license_test.license_expressions = detected_expressions
            license_test.dump()
            return

        try:
            assert expected_expressions == detected_expressions
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            results = expected_expressions + ['======================', '']
            failure_trace = detected_expressions[:] + ['======================', '']
            for match in matches:
                qtext, itext = get_texts(match)
                rule_text_file = match.rule.text_file
                if match.rule.is_license:
                    rule_data_file = rule_text_file.replace('LICENSE', 'yml')
                else:
                    rule_data_file = match.rule.data_file
                failure_trace.extend(['',
                    '======= MATCH ====', repr(match),
                    '======= Matched Query Text for:',
                    'file://{test_file}'.format(**locals())
                ])
                if test_data_file:
                    failure_trace.append('file://{test_data_file}'.format(**locals()))

                failure_trace.append('')
                failure_trace.append(qtext)
                failure_trace.extend(['',
                    '======= Matched Rule Text for:',
                    'file://{rule_text_file}'.format(**locals()),
                    'file://{rule_data_file}'.format(**locals()),
                    '',
                    itext,
                ])
            if not matches:
                failure_trace.extend(['',
                    '======= NO MATCH ====',
                    '======= Not Matched Query Text for:',
                    'file://{test_file}'.format(**locals())
                ])
                if test_data_file:
                    failure_trace.append('file://{test_data_file}'.format(**locals()))

            # this assert will always fail and provide a detailed failure trace
            assert '\n'.join(results) == '\n'.join(failure_trace)
    def test_match_in_binary_lkms_1(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/ath_pci.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['bsd-new', 'gpl-2.0'] == match.rule.license_keys()

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license Dual BSD GPL' == qtext
        assert 'license dual bsd gpl' == itext
    def test_match_in_binary_lkms_2(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/eeepc_acpi.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['gpl-1.0-plus'] == match.rule.license_keys()
        assert match.ispan == Span(0, 1)

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license GPL' == qtext
        assert 'license gpl' == itext
    def test_match_can_match_with_plain_rule_simple2(self):
        rule_text = u'''X11 License
        Copyright (C) 1996 X Consortium
        Permission is hereby granted, free of charge, to any person obtaining a copy
        of this software and associated documentation files (the "Software"), to deal
        in the Software without restriction, including without limitation the rights
        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
        copies of the Software, and to permit persons to whom the Software is
        furnished to do so, subject to the following conditions: The above copyright
        notice and this permission notice shall be included in all copies or
        substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS",
        WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
        TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
        NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM,
        DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
        OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the
        name of the X Consortium shall not be used in advertising or otherwise to
        promote the sale, use or other dealings in this Software without prior
        written authorization from the X Consortium. X Window System is a trademark
        of X Consortium, Inc.
        '''
        rule = Rule(stored_text=rule_text, license_expression='x-consortium')
        idx = index.LicenseIndex([rule])

        query_loc = self.get_test_loc('detect/simple_detection/x11-xconsortium_text.txt')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)

        expected_qtext = u'''
        X11 License Copyright C 1996 X Consortium Permission is hereby granted free
        of charge to any person obtaining copy of this software and associated
        documentation files the Software to deal in the Software without restriction
        including without limitation the rights to use copy modify merge publish
        distribute sublicense and or sell copies of the Software and to permit
        persons to whom the Software is furnished to do so subject to the following
        conditions The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software THE SOFTWARE
        IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED INCLUDING
        BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR PARTICULAR
        PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR
        ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR
        OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE Except as contained in this notice the name
        of the X Consortium shall not be used in advertising or otherwise to promote
        the sale use or other dealings in this Software without prior written
        authorization from the X Consortium X Window System is trademark of X
        Consortium Inc
        '''.split()
        match = matches[0]
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        assert expected_qtext == qtext.split()
 def test_match_in_binary_lkms_3(self):
     idx = cache.get_index()
     qloc = self.get_test_loc('positions/wlan_xauth.ko')
     matches = idx.match(location=qloc)
     assert 1 == len(matches)
     match = matches[0]
     assert ['bsd-new', 'gpl-2.0'] == match.rule.license_keys()
     assert 100 == match.coverage()
     assert 100 == match.score()
     qtext, itext = get_texts(match, location=qloc, idx=idx)
     assert 'license Dual BSD GPL' == qtext
     assert 'license dual bsd gpl' == itext
     assert Span(0, 3) == match.ispan
    def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched(
            self):
        # was failing when a gapped token (from a template) starts at a
        # beginning of an index doc. We may still skip that, but capture a large match anyway.

        rule_text = u'''
            Copyright
            THIS IS FROM [[THE OLD CODEHAUS]] AND CONTRIBUTORS
            IN NO EVENT SHALL [[THE OLD CODEHAUS]] OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE [[POSSIBILITY OF NEW SUCH]] DAMAGE
        '''

        rule = Rule(stored_text=rule_text, license_expression='test')

        legalese = (mini_legalese
                    | set([
                        'copyright', 'reserved', 'advised', 'liable', 'damage',
                        'contributors', 'alternately', 'possibility'
                    ]))

        idx = index.LicenseIndex([rule], _legalese=legalese)

        querys = u'''
            Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        result = idx.match(query_string=querys)
        assert len(result) == 1
        match = result[0]
        assert match.matcher == match_seq.MATCH_SEQ

        exp_qtext = u"""
            Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
            [THIS] [IS] [FROM] [THE] CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        """.split()

        exp_itext = u"""
            Copyright
            <THIS> <IS> <FROM> <THE> <OLD> CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE <OLD> CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF <NEW> SUCH DAMAGE
        """.lower().split()
        qtext, itext = get_texts(match)
        assert qtext.split() == exp_qtext
        assert qtext.split() == exp_qtext
        assert itext.split() == exp_itext
        assert match.coverage() >= 70
    def test_overlap_detection1(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   * License texts to detect:
        #   +- license 3 -----------+
        #   | +-license 2 --------+ |
        #   | |  +-license 1 --+  | |
        #   | +-------------------+ |
        #   +-----------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.'''

        license3 = '''
        this license source
        Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.
        has a permitted license'''

        license4 = '''My Redistributions is permitted.
        Redistribution and use permitted.
        Use is permitted too.'''

        rule1 = Rule(stored_text=license1, license_expression='overlap')
        rule2 = Rule(stored_text=license2, license_expression='overlap')
        rule3 = Rule(stored_text=license3, license_expression='overlap')
        rule4 = Rule(stored_text=license4, license_expression='overlap')
        idx = index.LicenseIndex([rule1, rule2, rule3, rule4])

        querys = 'Redistribution and use bla permitted.'
        # test : license1 is in the index and contains no other rule. should return rule1 at exact coverage.
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 3) == match.qspan
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use [bla] permitted' == qtext
    def test_match_return_correct_positions_with_short_index_and_queries(self):
        idx = index.LicenseIndex(
            [Rule(stored_text='MIT License', license_expression='mit')]
        )
        assert {'_tst_11_0': {'license': [1]}} == idx.to_dict(True)

        matches = idx.match(query_string='MIT License')
        assert 1 == len(matches)

        qtext, itext = get_texts(matches[0], query_string='MIT License', idx=idx)
        assert 'MIT License' == qtext
        assert 'mit license' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        matches = idx.match(query_string='MIT MIT License')
        assert 1 == len(matches)

        qtext, itext = get_texts(matches[0], query_string='MIT MIT License', idx=idx)
        assert 'MIT License' == qtext
        assert 'mit license' == itext
        assert Span(1, 2) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        query_doc1 = 'do you think I am a mit license MIT License, yes, I think so'
        # #                                  0       1   2       3
        matches = idx.match(query_string=query_doc1)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc1, idx=idx)
        assert 'mit license' == qtext
        assert 'mit license' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc1, idx=idx)
        assert 'MIT License' == qtext
        assert 'mit license' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan

        query_doc2 = '''do you think I am a mit license
                        MIT License
                        yes, I think so'''
        matches = idx.match(query_string=query_doc2)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc2, idx=idx)
        assert 'mit license' == qtext
        assert 'mit license' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc2, idx=idx)
        assert 'MIT License' == qtext
        assert 'mit license' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan
示例#19
0
    def test_match_exact_from_file(self):
        idx = index.LicenseIndex(self.get_test_rules('index/mini'))
        query_loc = self.get_test_loc('index/queryperfect-mini')

        result = idx.match(location=query_loc)
        assert 1 == len(result)
        match = result[0]

        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext
        assert 'redistribution and use in source and binary forms with or without modification are permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
    def test_match_exact_from_file(self):
        idx = MiniLicenseIndex(self.get_test_rules('index/mini'))
        query_loc = self.get_test_loc('index/queryperfect-mini')

        result = idx.match(location=query_loc)
        assert len(result) == 1
        match = result[0]

        qtext, itext = get_texts(match)
        assert qtext == 'Redistribution and use in source and binary forms, with or without modification,\nare permitted.'
        assert itext == 'redistribution and use in source and binary forms with or without modification\nare permitted'

        assert match.qspan == Span(0, 13)
        assert match.ispan == Span(0, 13)
    def test_match_exact_with_junk_in_between_good_tokens(self):
        _stored_text = u'licensed under the GPL, licensed under the GPL'
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression, stored_text=_stored_text)

        idx = MiniLicenseIndex([rule])
        querys = u'Hi licensed that under is the that GPL, licensed or under not the GPL by yes.'

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match)
        assert u'licensed [that] under [is] the [that] GPL, licensed [or] under [not] the GPL' == qtext
        assert u'licensed under the gpl licensed under the gpl' == itext
def check_rule_or_license_can_be_self_detected_exactly(rule):
    idx = cache.get_index()
    matches = idx.match(
        location=rule.text_file,
        _skip_hash_match=True,
        deadline=10,
    )
    expected = [rule.identifier, '100']
    results = flatten(
        (m.rule.identifier, str(int(m.coverage()))) for m in matches)

    try:
        assert results == expected
    except:

        from licensedcode.tracing import get_texts
        data_file = rule.data_file
        if not data_file:
            data_file = rule.text_file.replace('.LICENSE', '.yml')
        text_file = rule.text_file
        # On failure, we compare againto get additional failure details such as
        # a clickable text_file path
        failure_trace = ['======= TEST ====']
        failure_trace.extend(results)
        failure_trace.extend([
            '',
            f'file://{data_file}',
            f'file://{text_file}',
            '======================',
        ])

        for i, match in enumerate(matches):
            qtext, itext = get_texts(match)
            m_text_file = match.rule.text_file

            if match.rule.is_from_license:
                m_data_file = m_text_file.replace('LICENSE', '.yml')
            else:
                m_data_file = match.rule.data_file

            failure_trace.extend([
                '', f'======= MATCH {i} ====',
                repr(match), f'file://{m_data_file}', f'file://{m_text_file}',
                '======= Matched Query Text:', '', qtext, ''
                '======= Matched Rule Text:', '', itext
            ])

        # this assert will always fail and provide a detailed failure trace
        assert '\n'.join(failure_trace) == '\n'.join(expected)
示例#23
0
 def test_spdx_match_contains_spdx_prefix(self):
     from licensedcode import index
     from licensedcode import tracing
     rule_dir = self.get_test_loc('spdx/rules-overlap/rules')
     lics_dir = self.get_test_loc('spdx/rules-overlap/licenses')
     idx = index.LicenseIndex(models.get_rules(lics_dir, rule_dir))
     querys = 'SPDX-license-identifier: BSD-3-Clause-No-Nuclear-Warranty'
     matches = idx.match(query_string=querys)
     assert len(matches) == 1
     match = matches[0]
     qtext, itext = tracing.get_texts(match)
     expected_qtext = 'SPDX-license-identifier: BSD-3-Clause-No-Nuclear-Warranty'
     assert qtext == expected_qtext
     expected_itext = 'spdx license identifier bsd 3 clause no nuclear warranty'
     assert itext == expected_itext
    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(location=test_file, min_score=0)
        if not matches:
            matches = []

        detected_expressions = [
            match.rule.license_expression for match in matches
        ]

        # use detection as expected and dump test back
        if regen:
            if not expected_failure:
                license_test.license_expressions = detected_expressions
            license_test.dump()
            return

        try:
            assert expected_expressions == detected_expressions
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            failure_trace = detected_expressions[:]
            failure_trace.extend([test_name, 'test file: file://' + test_file])

            for match in matches:
                qtext, itext = get_texts(match, location=test_file, idx=idx)
                rule_text_file = match.rule.text_file
                rule_data_file = match.rule.data_file
                failure_trace.extend([
                    '', '', '======= MATCH ====', match,
                    '======= Matched Query Text for:',
                    'file://{test_file}'.format(**locals())
                ])
                if test_data_file:
                    failure_trace.append(
                        'file://{test_data_file}'.format(**locals()))

                failure_trace.append(qtext.splitlines())
                failure_trace.extend([
                    '',
                    '======= Matched Rule Text for:'
                    'file://{rule_text_file}'.format(**locals()),
                    'file://{rule_data_file}'.format(**locals()),
                    itext.splitlines(),
                ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_expressions == failure_trace
    def test_match_return_one_match_with_correct_offsets(self):
        idx = index.LicenseIndex([
            Rule(stored_text='A one. a license two. A three.',
                 license_expression='abc')])

        querys = u'some junk. A one. A license two. A three.'
        #          0    1     2 3    4 5       6    7 8

        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'one license two three' == qtext
        assert 'one license two three' == itext

        assert Span(0, 3) == match.qspan
        assert Span(0, 3) == match.ispan
    def test_match_works_for_apache_rule(self):
        idx = cache.get_index()
        querys = u'''I am not a license.

            The Apache Software License, Version 2.0
            http://www.apache.org/licenses/LICENSE-2.0.txt
            '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]
        assert 'apache-2.0_212.RULE' == match.rule.identifier
        assert match_aho.MATCH_AHO_EXACT == match.matcher

        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert u'license The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext
        assert (1, 4) == match.lines()
示例#27
0
    def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched(
            self):
        # was failing when a gapped token (from a template) starts at a
        # beginning of an index doc. We may still skip that, but capture a large match anyway.

        rule_text = u'''
            Copyright
            THIS IS FROM {{THE OLD CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE OLD CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF NEW SUCH}} DAMAGE
        '''

        rule = Rule(stored_text=rule_text, license_expression='test')
        idx = index.LicenseIndex([rule])

        querys = u'''
            Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert match_seq.MATCH_SEQ == match.matcher

        exp_qtext = u"""
            Copyright [2003] [C] [James] [All] [Rights] [Reserved]
            THIS IS FROM THE CODEHAUS
            AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
        """.split()

        exp_itext = u"""
            Copyright
            THIS IS FROM THE <OLD> CODEHAUS
            AND CONTRIBUTORS
            IN NO EVENT SHALL THE <OLD> CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF <NEW> SUCH DAMAGE
        """.lower().split()
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()
        assert 90 <= match.coverage()
    def test_match_exact_from_string_once(self):
        rule_text = 'Redistribution and use in source and binary forms, with or without modification, are permitted'
        idx = MiniLicenseIndex([models.Rule(stored_text=rule_text, license_expression='bsd')])
        querys = '''
            The
            Redistribution and use in source and binary forms, with or without modification, are permitted.

            Always'''

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match)
        assert 'Redistribution and use in source and binary forms, with or without modification,\nare permitted.' == qtext
        assert 'redistribution and use in source and binary forms with or without modification\nare permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
示例#29
0
    def test_match_return_correct_offsets(self):
        _stored_text = u'A GPL. A MIT. A LGPL.'
        #         0   1  2   3  4    5
        license_expression = 'tst'
        rule = models.Rule(license_expression=license_expression,
                           stored_text=_stored_text)
        idx = index.LicenseIndex([rule])
        querys = u'some junk. A GPL. A MIT. A LGPL.'
        #             0    1  2   3  4   5  6    7

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'A GPL A MIT A LGPL' == qtext
        assert 'A GPL A MIT A LGPL' == itext

        assert Span(0, 5) == match.qspan
        assert Span(0, 5) == match.ispan
    def test_match_can_match_with_sax_rule_for_public_domain(self):
        test_text = '''
        I hereby abandon any property rights to , and release all of  source
        code, compiled code, and documentation contained in this distribution
        into the Public Domain.
        '''
        rule = Rule(stored_text=test_text, license_expression='public-domain')
        idx = index.LicenseIndex([rule])
        querys = '''
        SAX2 is Free!
        I hereby abandon any property rights to SAX 2.0 (the Simple API for
        XML), and release all of the SAX 2.0 source code, compiled code, and
        documentation contained in this distribution into the Public Domain. SAX
        comes with NO WARRANTY or guarantee of fitness for any purpose.
        SAX2 is Free!
        '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        expected_qtext = u'''
        I hereby abandon any property rights to [SAX] [2] [0] <the> [Simple] [API] [for] [XML]
        <and> <release> <all> <of> <the> [SAX] [2] [0]
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
        I hereby abandon any property rights to
        <and> <release> <all> <of>
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.lower().split()
        assert expected_itext == itext.split()

        assert 84 == match.coverage()
        assert 84 == match.score()
        assert Span(0, 6) | Span(13, 26) == match.qspan
        assert Span(0, 6) | Span(11, 24) == match.ispan