示例#1
0
    def test_dump_rules(self):
        test_dir = self.get_test_loc('models/rules', copy=True)
        rules = list(models.load_rules(test_dir))
        for r in rules:
            r.dump()

        rules = list(models.load_rules(test_dir))
        results = as_sorted_mapping_seq(rules)
        expected = self.get_test_loc('models/rules.expected.json')
        check_json(expected, results)
示例#2
0
 def test_load_rules(self):
     test_dir = self.get_test_loc('models/rules')
     rules = list(models.load_rules(test_dir))
     assert all(isinstance(r, models.Rule) for r in rules)
     results = as_sorted_mapping_seq(rules)
     expected = self.get_test_loc('models/rules.expected.json')
     check_json(expected, results)
示例#3
0
 def test_query_run_has_correct_offset(self):
     rule_dir = self.get_test_loc('query/runs/rules')
     rules = list(models.load_rules(rule_dir))
     idx = index.LicenseIndex(rules)
     query_doc = self.get_test_loc('query/runs/query.txt')
     q = Query(location=query_doc, idx=idx, line_threshold=4)
     result = [qr.to_dict() for qr in q.query_runs]
     expected = [
         {'end': 0, 'start': 0, 'tokens': u'inc'},
         {'end': 123, 'start': 1,
         'tokens': (
             u'this library is free software you can redistribute it and or modify '
             u'it under the terms of the gnu library general public license as '
             u'published by the free software foundation either version 2 of the '
             u'license or at your option any later version this library is '
             u'distributed in the hope that it will be useful but without any '
             u'warranty without even the implied warranty of merchantability or '
             u'fitness for a particular purpose see the gnu library general public '
             u'license for more details you should have received a copy of the gnu '
             u'library general public license along with this library see the file '
             u'copying lib if not write to the free software foundation inc 51 '
             u'franklin street fifth floor boston ma 02110 1301 usa')
          }
     ]
     assert expected == result
 def test_index_fails_on_duplicated_rules(self):
     rule_dir = self.get_test_loc('index/no_duplicated_rule')
     try:
         MiniLicenseIndex(models.load_rules(rule_dir))
         self.fail('Exception on dupes not raised')
     except AssertionError as e:
         assert u'Duplicate rules' in str(e)
示例#5
0
    def test_match_license_performance_profiling_on_index_with_single_license(self):
        from time import time
        from licensedcode import query

        # pre-index : we are profiling only the detection, not the indexing
        rule_dir = self.get_test_loc('perf/idx/rules')
        rules = models.load_rules(rule_dir)
        idx = index.LicenseIndex(rules)
        location = self.get_test_loc('perf/idx/query.txt')
        querys = open(location, 'rb').read()

        qry = query.build_query(query_string=querys, idx=idx)

        def mini_seq_match(idx):
            list(idx.get_approximate_matches(qry, [], []))


        # qtokens_as_str = array('h', tokens).tostring()
        start = time()
        for _ in range(100):
            mini_seq_match(idx)
        duration = time() - start
        values = ('ScanCode diff:', duration)
        print(*values)
        raise Exception(values)
    def test_query_run_and_tokenizing_breaking_works__with_plus_as_expected(self):
        rule_dir = self.get_test_loc('query/run_breaking/rules')
        rules = list(models.load_rules(rule_dir))
        idx = index.LicenseIndex(rules)
        query_doc = self.get_test_loc('query/run_breaking/query.txt')
        q = Query(query_doc, idx=idx)
        result = [qr.to_dict() for qr in q.query_runs]
        expected = [
            {'end': 121, 'start': 0,
             'tokens': 
                'this library is free software you can redistribute it '
                'and or modify it under the terms of the gnu library '
                'general public license as published by the free software '
                'foundation either version 2 of the license or at your '
                'option any later version this library is distributed in '
                'the hope that it will be useful but without any warranty '
                'without even the implied warranty of merchantability or '
                'fitness for a particular purpose see the gnu library '
                'general public license for more details you should have '
                'received a copy of the gnu library general public '
                'license along with this library see the file copying lib '
                'if not write to the free software foundation 51 franklin '
                'street fifth floor boston ma 02110 1301 usa'}
        ]

        assert expected == result
        q.tokens
        # check rules token are the same exact set as the set of the last query run
        txtid = idx.tokens_by_tid
        qrt = [txtid[t] for t in q.query_runs[-1].tokens]
        irt = [txtid[t] for t in idx.tids_by_rid[0]]
        assert irt == qrt
示例#7
0
 def test_query_run_has_correct_offset(self):
     rule_dir = self.get_test_loc('query/runs/rules')
     rules = list(models.load_rules(rule_dir))
     idx = index.LicenseIndex(rules)
     query_doc = self.get_test_loc('query/runs/query.txt')
     q = Query(location=query_doc, idx=idx, line_threshold=4)
     result = [qr.to_dict() for qr in q.query_runs]
     expected = [{
         'end': 0,
         'start': 0,
         'tokens': u'inc'
     }, {
         'end':
         123,
         'start':
         1,
         'tokens':
         (u'this library is free software you can redistribute it and or modify '
          u'it under the terms of the gnu library general public license as '
          u'published by the free software foundation either version 2 of the '
          u'license or at your option any later version this library is '
          u'distributed in the hope that it will be useful but without any '
          u'warranty without even the implied warranty of merchantability or '
          u'fitness for a particular purpose see the gnu library general public '
          u'license for more details you should have received a copy of the gnu '
          u'library general public license along with this library see the file '
          u'copying lib if not write to the free software foundation inc 51 '
          u'franklin street fifth floor boston ma 02110 1301 usa')
     }]
     assert expected == result
示例#8
0
 def test_index_fails_on_duplicated_rules(self):
     rule_dir = self.get_test_loc('index/no_duplicated_rule')
     try:
         index.LicenseIndex(models.load_rules(rule_dir))
         self.fail('Exception on dupes not raised')
     except AssertionError as e:
         assert u'Duplicate rules' in str(e)
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')

        legalese = (mini_legalese
                    | set([
                        'redistributions', 'written', 'registered', 'derived',
                        'damage', 'due', 'alternately', 'nor'
                    ]))

        idx = index.LicenseIndex(load_rules(rule_dir), _legalese=legalese)

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert len(matches) == 1
        match = matches[0]
        assert match.matcher == match_seq.MATCH_SEQ
        qtext, _itext = get_texts(match)
        expected = u'''
            The OpenSymphony Group. All rights reserved.

            Redistribution and use in source and binary forms, with or without modification,
            are permitted provided that the following conditions are met:

            1. Redistributions of source code must retain the above copyright notice, this
            list of conditions and the following disclaimer.

            2. Redistributions in binary form must reproduce the above copyright notice,
            this list of conditions and the following disclaimer in the documentation and/or
            other materials provided with the distribution.

            3. The end-user documentation included with the redistribution, if any, must
            include the following acknowledgment:

            [4]. "[This] [product] [includes] [software] [developed] [by] [the] [OpenSymphony] [Group]
            ([http]://[www].[opensymphony].[com]/)."

            [5]. Alternately, this acknowledgment may appear in the software itself, if and
            wherever such third-party acknowledgments normally appear.

            The names "OpenSymphony" and "The OpenSymphony Group" must not be used to
            endorse or promote products derived from this software without prior written
            permission. For written permission, please contact [email protected] .

            Products derived from this software may not be called "OpenSymphony" or
            "[OsCore]", nor may "OpenSymphony" or "[OsCore]" appear in their name, without prior
            written permission of the OpenSymphony Group.

            THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
            INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
            FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE
            SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
            INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
            LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
            PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
            LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
            OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
            ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        assert qtext.split() == expected.split()
示例#10
0
 def test_load_rules(self):
     test_dir = self.get_test_loc('models/rules')
     rules = models.load_rules(test_dir)
     # one license is obsolete and not loaded
     assert 3 == len(rules)
     assert all(isinstance(r, models.Rule) for r in rules)
     # test a sample of a licenses field
     expected = [[u'lzma-sdk-original'], [u'gpl-2.0'], [u'oclc-2.0']]
     assert sorted(expected) == sorted(r.licenses for r in rules)
示例#11
0
 def test_load_rules(self):
     test_dir = self.get_test_loc('models/rules')
     rules = models.load_rules(test_dir)
     # one license is obsolete and not loaded
     assert 3 == len(rules)
     assert all(isinstance(r, models.Rule) for r in rules)
     # test a sample of a licenses field
     expected = [[u'lzma-sdk-original'], [u'gpl-2.0'], [u'oclc-2.0']]
     assert sorted(expected) == sorted(r.licenses for r in rules)
示例#12
0
    def test_match_license_performance_profiling_on_limited_index(self):
        # pre-index : we are profiling only the detection, not the indexing
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        rules = models.load_rules(rule_dir)
        idx = index.LicenseIndex(rules)

        stats_file = 'license_match_limited_index_profile_log.txt'
        locations = [self.get_test_loc('detect/rule_template/query.txt')]
        self.profile_match(idx, locations, stats_file)
    def test_match_license_performance_profiling_on_limited_index(self):
        # pre-index : we are profiling only the detection, not the indexing
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        rules = models.load_rules(rule_dir)
        idx = index.LicenseIndex(rules)

        stats_file = 'license_match_limited_index_profile_log.txt'
        locations = [self.get_test_loc('detect/rule_template/query.txt')]
        self.profile_match(idx, locations, stats_file)
示例#14
0
    def test_template_detection_works_for_sun_bcl(self):
        # setup
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        rules = models.load_rules(rule_dir)
        index = detect.get_license_index(rules)

        # test
        qdoc = self.get_test_loc('detect/rule_template/query.txt')
        matches = index.match(qdoc)
        assert 1 == len(matches)
示例#15
0
    def test_template_detection_works_for_sun_bcl(self):
        # setup
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        rules = models.load_rules(rule_dir)
        index = detect.get_license_index(rules)

        # test
        qdoc = self.get_test_loc('detect/rule_template/query.txt')
        matches = index.match(qdoc)
        assert 1 == len(matches)
示例#16
0
def get_rules(source, replacement):
    """
    Yield tuple of (rule, new text) for non-false positive existing Rules with a
    text that contains source.
    """
    for rule in models.load_rules():
        if rule.is_false_positive:
            continue
        text = rule.text()
        if source in text:
            yield rule, text.replace(source, replacement)
示例#17
0
    def test_match_can_match_with_index_built_from_rule_directory_with_sun_bcls(self):
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        # at line 151 the query has an extra "Software" word inserted to avoid hash matching
        query_loc = self.get_test_loc('detect/rule_template/query.txt')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 957) | Span(959, 1756) == match.qspan
        assert match_seq.MATCH_SEQ == match.matcher
示例#18
0
    def test_match_can_match_with_index_built_from_rule_directory_with_sun_bcls(self):
        rule_dir = self.get_test_loc('detect/rule_template/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        # at line 151 the query has an extra "Software" word inserted to avoid hash matching
        query_loc = self.get_test_loc('detect/rule_template/query.txt')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 957) | Span(959, 1756) == match.qspan
        assert match_seq.MATCH_SEQ == match.matcher
示例#19
0
    def test_filter_matches_handles_interlaced_matches_with_overlap_and_same_license(self):
        rule_dir = self.get_test_loc('match_filter/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))
        rules = {r.identifier: r for r in idx.rules_by_rid}
        query_loc = self.get_test_loc('match_filter/query')
        matches = idx.match(location=query_loc)
        expected = [
            # filtered: LicenseMatch(matcher='3-seq', rule=rules['rule1.RULE'], qspan=Span(4, 47) | Span(50, 59), ispan=Span(1, 53)),
            LicenseMatch(matcher='2-aho', rule=rules['rule2.RULE'], qspan=Span(24, 86), ispan=Span(0, 62)),
        ]

        assert expected == matches
示例#20
0
    def test_rules_types_has_only_boolean_values(self):
        rules = list(models.load_rules(rules_data_dir))
        rule_consitency_errors = []

        for r in rules:
            list_rule_types = [r.is_license_text, r.is_license_notice, 
                               r.is_license_tag, r.is_license_reference]
    
            if any(type(rule_type) != bool for rule_type in list_rule_types):
                rule_consitency_errors.append((r.data_file, r.text_file))

        assert rule_consitency_errors == []
示例#21
0
    def test_rules_have_only_one_rule_type(self):
        rules = list(models.load_rules(rules_data_dir))
        rule_consitency_errors = []

        for r in rules:
            list_rule_types = [r.is_license_text, r.is_license_notice, 
                               r.is_license_tag, r.is_license_reference]

            if sum(list_rule_types) > 1:
                rule_consitency_errors.append(r.data_file)

        assert rule_consitency_errors == []
    def test_match_freertos(self):
        rule_dir = self.get_test_loc('mach_aho/rtos_exact/')
        idx = index.LicenseIndex(models.load_rules(rule_dir))

        query_loc = self.get_test_loc('mach_aho/rtos_exact/gpl-2.0-freertos.RULE')

        qry = query.build_query(location=query_loc, idx=idx)

        matches = match_aho.exact_match(idx, qry.whole_query_run(), idx.rules_automaton)
        assert 1 == len(matches)
        match = matches[0]
        assert match_aho.MATCH_AHO_EXACT == match.matcher
 def test_match_hash_returns_correct_offset(self):
     rule_dir = self.get_test_loc('hash/rules')
     rules = list(models.load_rules(rule_dir))
     idx = index.LicenseIndex(rules)
     query_doc = self.get_test_loc('hash/query.txt')
     matches = idx.match(query_doc)
     assert len(matches) == 1
     match = matches[0]
     assert match.matcher == match_hash.MATCH_HASH
     assert match.coverage() == 100
     assert match.rule == rules[0]
     assert match.qspan == Span(0, 119)
     assert match.ispan == Span(0, 119)
示例#24
0
 def test_Rule__validate_with_invalid_language(self):
     rule_dir = self.get_test_loc('models/rule_validate_lang')
     validations = []
     for rule in sorted(models.load_rules(rule_dir)):
         validations.extend(rule.validate())
     expected = [
         'Unknown language: foobar',
         'Invalid rule is_license_* flags. Only one allowed.',
         'At least one is_license_* flag is needed.',
         'Invalid rule is_license_* flags. Only one allowed.',
         'At least one is_license_* flag is needed.',
     ]
     assert validations == expected
 def test_match_hash_returns_correct_offset(self):
     rule_dir = self.get_test_loc('hash/rules')
     rules = list(models.load_rules(rule_dir))
     idx = index.LicenseIndex(rules)
     query_doc = self.get_test_loc('hash/query.txt')
     matches = idx.match(query_doc)
     assert 1 == len(matches)
     match = matches[0]
     assert match_hash.MATCH_HASH == match.matcher
     assert 100 == match.coverage()
     assert rules[0] == match.rule
     assert Span(0, 121) == match.qspan
     assert Span(0, 121) == match.ispan
    def test_match_hash_can_match_exactly(self):
        rule_dir = self.get_test_loc('hash/rules')
        rules = list(models.load_rules(rule_dir))
        idx = index.LicenseIndex(rules)
        query_doc = self.get_test_loc('hash/rules/lgpl-2.0-plus_23.RULE')

        matches = idx.match(query_doc)
        assert len(matches) == 1
        match = matches[0]
        assert match.coverage() == 100
        assert match.matcher == match_hash.MATCH_HASH
        assert match.rule == rules[0]
        assert match.qspan == Span(0, 119)
        assert match.ispan == Span(0, 119)
示例#27
0
def cli(path=(), update=True):
    """
    Update licenses and rules with ignorable copyrights, holders, authors URLs
    and emails.
    """
    licensish = list(cache.get_licenses_db().values()) + list(
        models.load_rules())

    if path:
        licensish = [
            l for l in licensish
            if l.text_file.endswith(path) or l.data_file.endswith(path)
        ]
    refresh_ignorables(licensish)
    def test_match_hash_can_match_exactly(self):
        rule_dir = self.get_test_loc('hash/rules')
        rules = list(models.load_rules(rule_dir))
        idx = index.LicenseIndex(rules)
        query_doc = self.get_test_loc('hash/rules/lgpl-2.0-plus_23.RULE')

        matches = idx.match(query_doc)
        assert 1 == len(matches)
        match = matches[0]
        assert 100 == match.coverage()
        assert match_hash.MATCH_HASH == match.matcher
        assert rules[0] == match.rule
        assert Span(0, 121) == match.qspan
        assert Span(0, 121) == match.ispan
示例#29
0
    def test_match_freertos(self):
        rule_dir = self.get_test_loc('mach_aho/rtos_exact/')
        idx = index.LicenseIndex(models.load_rules(rule_dir))

        query_loc = self.get_test_loc(
            'mach_aho/rtos_exact/gpl-2.0-freertos.RULE')

        qry = query.build_query(location=query_loc, idx=idx)

        matches = match_aho.exact_match(idx, qry.whole_query_run(),
                                        idx.rules_automaton)
        assert len(matches) == 1
        match = matches[0]
        assert match.matcher == match_aho.MATCH_AHO_EXACT
示例#30
0
    def test_query_and_index_tokens_are_identical_for_same_text(self):
        rule_dir = self.get_test_loc('query/rtos_exact/')
        from licensedcode.models import load_rules
        idx = index.LicenseIndex(load_rules(rule_dir))
        query_loc = self.get_test_loc('query/rtos_exact/gpl-2.0-freertos.RULE')

        index_text_tokens = [idx.tokens_by_tid[t] for t in idx.tids_by_rid[0]]

        qry = Query(location=query_loc, idx=idx, line_threshold=4)
        wqry = qry.whole_query_run()

        query_text_tokens = [idx.tokens_by_tid[t] for t in wqry.tokens]

        assert index_text_tokens == query_text_tokens
        assert u' '.join(index_text_tokens) == u' '.join(query_text_tokens)
示例#31
0
    def test_query_and_index_tokens_are_identical_for_same_text(self):
        rule_dir = self.get_test_loc('query/rtos_exact/')
        from licensedcode.models import load_rules
        idx = index.LicenseIndex(load_rules(rule_dir))
        query_loc = self.get_test_loc('query/rtos_exact/gpl-2.0-freertos.RULE')

        index_text_tokens = [idx.tokens_by_tid[t] for t in idx.tids_by_rid[0]]

        qry = Query(location=query_loc, idx=idx, line_threshold=4)
        wqry = qry.whole_query_run()

        query_text_tokens = [idx.tokens_by_tid[t] for t in wqry.tokens]

        assert index_text_tokens == query_text_tokens
        assert u' '.join(index_text_tokens) == u' '.join(query_text_tokens)
示例#32
0
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert match_seq.MATCH_SEQ == match.matcher
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = u'''
        Redistribution and use in source and
        binary forms with or without modification are permitted provided that the following
        conditions are met 
        <1> Redistributions of source code must retain the above copyright
        notice this of conditions and the following disclaimer 
        <2> Redistributions in
        binary form must reproduce the above copyright notice this  of conditions and the
        following disclaimer in the documentation and or other materials provided with the
        distribution 
        <3> The end user documentation included with the redistribution if any
        must include the following acknowledgment <4> <This> <product> <includes> <software>
        <developed> <by> <the> <OpenSymphony> <Group> <http> <www> <opensymphony> <com> <5>
        Alternately this acknowledgment may appear in the software itself if and wherever
        such third party acknowledgments normally appear The names OpenSymphony and The
        OpenSymphony Group must not be used to endorse or promote products derived from this
        software without prior written permission For written permission please contact
        license opensymphony com Products derived from this software may not be called
        OpenSymphony or OsCore nor may OpenSymphony or OsCore appear in their name
        without prior written permission of the OpenSymphony Group THIS SOFTWARE IS PROVIDED
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR PARTICULAR PURPOSE ARE
        DISCLAIMED IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE
        LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE
        DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
        LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
        OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
        POSSIBILITY OF SUCH DAMAGE
        '''
        assert expected.split() == qtext.split()
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert match_seq.MATCH_SEQ == match.matcher
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = u'''
        The OpenSymphony Group All rights reserved Redistribution and use in source and
        binary forms with or without modification are permitted provided that the following
        conditions are met 1 Redistributions of source code must retain the above copyright
        notice this list of conditions and the following disclaimer 2 Redistributions in
        binary form must reproduce the above copyright notice this list of conditions and the
        following disclaimer in the documentation and or other materials provided with the
        distribution 3 The end user documentation included with the redistribution if any
        must include the following acknowledgment <4> <This> <product> <includes> <software>
        <developed> <by> <the> <OpenSymphony> <Group> <http> <www> <opensymphony> <com> <5>
        Alternately this acknowledgment may appear in the software itself if and wherever
        such third party acknowledgments normally appear The names OpenSymphony and The
        OpenSymphony Group must not be used to endorse or promote products derived from this
        software without prior written permission For written permission please contact
        license opensymphony com Products derived from this software may not be called
        OpenSymphony or [OsCore] nor may OpenSymphony or [OsCore] appear in their name
        without prior written permission of the OpenSymphony Group THIS SOFTWARE IS PROVIDED
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE
        LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE
        DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
        LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
        OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
        POSSIBILITY OF SUCH DAMAGE
        '''
        assert expected.split() == qtext.split()
示例#34
0
 def test_template_rule_is_loaded_correctly(self):
     test_dir = self.get_test_loc('models/rule_template')
     rules = list(models.load_rules(test_dir))
     assert 1 == len(rules)
示例#35
0
 def test_template_rule_is_loaded_correctly(self):
     test_dir = self.get_test_loc('models/rule_template')
     rules = models.load_rules(test_dir)
     assert 1 == len(rules)
     rule = rules[0]
     assert rule.template
示例#36
0
 def test_load_rules_loads_file_content_at_path_and_not_path_as_string(self):
     rule_dir = self.get_test_loc('models/similar_names')
     rules = list(models.load_rules(rule_dir))
     result = [' '.join(list(r.tokens())[-4:]) for r in  rules]
     assert not any([r == 'rules proprietary 10 rule' for r in result])
示例#37
0
 def test_Rule__validate_with_false_positive_rule(self):
     rule_dir = self.get_test_loc('models/rule_validate')
     rule = list(models.load_rules(rule_dir))[0]
     assert list(rule.validate()) == []
示例#38
0
 def test_template_rule_is_loaded_correctly(self):
     test_dir = self.get_test_loc('models/rule_template')
     rules = list(models.load_rules(test_dir))
     assert len(rules) == 1
示例#39
0
 def test_template_rule_is_loaded_correctly(self):
     test_dir = self.get_test_loc('models/rule_template')
     rules = models.load_rules(test_dir)
     assert 1 == len(rules)
     rule = rules[0]
     assert rule.template

def build_rule_validation_tests(rules, cls):
    """
    Dynamically build an individual test method for each rule texts in a rules
    `data_set` then mapping attaching the test method to the `cls` test class.
    """
    for rule in rules:
        if rule.negative:
            continue
        expected_identifier = rule.identifier
        test_name = ('test_validate_self_detection_of_rule_for_' +
                     text.python_safe_name(expected_identifier))
        test_method = make_license_test_function(
            rule.licenses,
            rule.text_file,
            rule.data_file,
            test_name,
            detect_negative=not rule.negative,
            trace_text=True)
        setattr(cls, test_name, test_method)


class TestValidateLicenseRuleSelfDetection(unittest.TestCase):
    # Test functions are attached to this class at import time
    pass


build_rule_validation_tests(models.load_rules(),
                            TestValidateLicenseRuleSelfDetection)
class TestValidateLicenseTextDetection(unittest.TestCase):
    # Test functions are attached to this class at import time
    pass


build_license_validation_tests(cache.get_licenses_db(), TestValidateLicenseTextDetection)


def build_rule_validation_tests(rules, cls):
    """
    Dynamically build an individual test method for each rule texts in a rules
    `data_set` then mapping attaching the test method to the `cls` test class.
    """
    for rule in rules:
        if rule.negative:
            continue
        expected_identifier = rule.identifier
        test_name = ('test_validate_self_detection_of_rule_for_' + text.python_safe_name(expected_identifier))
        test_method = make_license_test_function(
            rule.licenses, rule.text_file, rule.data_file, test_name, detect_negative=not rule.negative, trace_text=True
        )
        setattr(cls, test_name, test_method)


class TestValidateLicenseRuleSelfDetection(unittest.TestCase):
    # Test functions are attached to this class at import time
    pass


build_rule_validation_tests(models.load_rules(), TestValidateLicenseRuleSelfDetection)
def cli(licenses, rules, category, license_key, with_text):
    """
    Write Licenses/Rules from scancode into a CSV file with all details.
    Output can be optionally filtered by category/license-key.
    """
    licenses_output = []
    rules_output = []

    licenses_data = load_licenses()

    if licenses:
        for lic in licenses_data.values():
            license_data = lic.to_dict()
            if with_text:
                license_data["text"] = lic.text[:200]
            license_data["is_unknown"] = lic.is_unknown
            license_data["words_count"] = len(lic.text)
            license_data["reference_url"] = SCANCODE_LICENSEDB_URL.format(lic.key)
            licenses_output.append(license_data)

        if category:
            licenses_output = filter_by_attribute(
                data=licenses_output, attribute="category", required_key=category
            )

        if license_key:
            licenses_output = filter_by_attribute(
                data=licenses_output,
                attribute="key",
                required_key=license_key,
            )

        licenses_output = flatten_output(data=licenses_output)
        write_data_to_csv(data=licenses_output, output_csv=licenses, fieldnames=LICENSES_FIELDNAMES)

    if rules:
        rules_data = list(load_rules())
        for rule in rules_data:
            rule_data = rule.to_dict()
            rule_data["identifier"] = rule.identifier
            rule_data["referenced_filenames"] = rule.referenced_filenames
            if with_text:
                rule_data["text"] = rule.text()[:200]
            rule_data["has_unknown"] = rule.has_unknown
            rule_data["words_count"] = len(rule.text())
            try:
                rule_data["category"] = licenses_data[rule_data["license_expression"]].category
            except KeyError:
                pass
            rules_output.append(rule_data)

        if category:
            rules_output = filter_by_attribute(
                data=rules_output,
                attribute="category",
                required_key=category,
            )

        if license_key:
            rules_output = filter_by_attribute(
                data=rules_output,
                attribute="license_expression",
                required_key=license_key,
            )

        rules_output = flatten_output(rules_output)
        write_data_to_csv(data=rules_output, output_csv=rules, fieldnames=RULES_FIELDNAMES)