Exemplo n.º 1
0
    def fetch_licenses(self, scancode_licenses, from_repo=SPDX_DEFAULT_REPO):
        """
        Yield License objects fetched from the latest SPDX license list.
        """
        # get latest tag
        tags_url = 'https://api.github.com/repos/{from_repo}/tags'.format(
            **locals())
        tags = get_response(tags_url, headers={}, params={})
        tag = tags[0]['name']

        # fetch licenses and exceptions
        # note that exceptions data have -- weirdly enough -- a different schema
        zip_url = 'https://github.com/{from_repo}/archive/{tag}.zip'.format(
            **locals())
        if TRACE_FETCH:
            print('Fetching SPDX license data version:', tag, 'from:', zip_url)
        licenses_zip = fetch.download_url(zip_url, timeout=120)
        if TRACE_FETCH: print('Fteched SPDX licenses to:', licenses_zip)
        with zipfile.ZipFile(licenses_zip) as archive:
            for path in archive.namelist():
                if not (path.endswith('.json') and
                        ('/json/details/' in path
                         or '/json/exceptions/' in path)):
                    continue
                if TRACE_FETCH: print('Loading license:', path)
                if path.endswith('+.json'):
                    # Skip the old plus licenses. We use them in
                    # ScanCode, but they are deprecated in SPDX.
                    continue
                details = json.loads(archive.read(path))
                lic = self.build_license(details, scancode_licenses)
                if lic:
                    yield lic
Exemplo n.º 2
0
    def fetch_licenses(self):
        """
        Yield all the latest License object from the latest SPDX license list.
        Store the texts in the license_dir.
        """
        # get latest tag
        tags_url = 'https://api.github.com/repos/spdx/license-list-data/tags'
        tags = get_response(tags_url, headers={}, params={})
        tag = tags[0]['name']

        # fetch licenses and exceptions
        # note that exceptions data have -- weirdly enough -- a different schema
        zip_url = 'https://github.com/spdx/license-list-data/archive/%(tag)s.zip' % locals()
        if TRACE_FETCH: print('Fetching SPDX license data from:', zip_url)
        licenses_zip = fetch.download_url(zip_url, timeout=120)
        with zipfile.ZipFile(licenses_zip) as archive:
            for path in archive.namelist():
                if not (path.endswith('.json')
                and ('/json/details/' in path or '/json/exceptions/' in path)):
                    continue
                if TRACE_FETCH: print('Loading license:', path)
                if path.endswith('+.json'):
                    # Skip the old plus licenses. We use them in
                    # ScanCode, but they are deprecated in SPDX.
                    continue
                details = json.loads(archive.read(path))
                lic = self._build_license(details)
                if lic:
                    yield lic
Exemplo n.º 3
0
    def fetch_licenses(self):
        """
        Yield all the latest License object from the latest SPDX license list.
        Store the texts in the license_dir.
        """
        # get latest tag
        tags_url = 'https://api.github.com/repos/spdx/license-list-data/tags'
        tags = get_response(tags_url, headers={}, params={})
        tag = tags[0]['name']

        # fetch licenses and exceptions
        # note that exceptions data have -- weirdly enough -- a different schema
        zip_url = 'https://github.com/spdx/license-list-data/archive/%(tag)s.zip' % locals(
        )
        if TRACE_FETCH: print('Fetching SPDX license data from:', zip_url)
        licenses_zip = fetch.download_url(zip_url, timeout=120)
        with zipfile.ZipFile(licenses_zip) as archive:
            for path in archive.namelist():
                if not (path.endswith('.json') and
                        ('/json/details/' in path
                         or '/json/exceptions/' in path)):
                    continue
                if TRACE_FETCH: print('Loading license:', path)
                if path.endswith('+.json'):
                    # Skip the old plus licenses. We use them in
                    # ScanCode, but they are deprecated in SPDX.
                    continue
                details = json.loads(archive.read(path))
                lic = self._build_license(details)
                if lic:
                    yield lic
Exemplo n.º 4
0
def fetch_text(url):
    """
    Fetch and return a temp file from the content at `url`.
    """
    if 'raw' in url:
        fetchable = url
    elif 'github.com' in url and '/blob/' in url:
        fetchable = url.replace('/blob/', '/raw/')
    else:
        fetchable = url
    print('  Fetching:', fetchable)
    return fetch.download_url(fetchable, timeout=120)
Exemplo n.º 5
0
def fetch_text(url):
    """
    Fetch and return a temp file from the content at `url`.
    """
    if "raw" in url:
        fetchable = url
    elif "github.com" in url and "/blob/" in url:
        fetchable = url.replace("/blob/", "/raw/")
    else:
        fetchable = url
    print("  Fetching:", fetchable)
    return fetch.download_url(fetchable, timeout=120)
Exemplo n.º 6
0
    def fetch_licenses(
        self,
        scancode_licenses=None,
        commitish=None,
        skip_oddities=True,
        from_repo=SPDX_DEFAULT_REPO,
    ):
        """
        Yield License objects fetched from the latest SPDX license list. Use the
        latest tagged version or the `commitish` if provided.
        If skip_oddities is True, some oddities are skipped or handled
        specially, such as licenses with a trailing + or foreign language
        licenses.
        """
        if not commitish:
            # get latest tag
            tags_url = 'https://api.github.com/repos/{from_repo}/tags'.format(
                **locals())
            tags = get_response(tags_url, headers={}, params={})
            tag = tags[0]['name']
        else:
            tag = commitish

        # fetch licenses and exceptions
        # note that exceptions data have -- weirdly enough -- a different schema
        zip_url = 'https://github.com/{from_repo}/archive/{tag}.zip'.format(
            **locals())
        if TRACE_FETCH:
            print('Fetching SPDX license data version:', tag, 'from:', zip_url)
        licenses_zip = fetch.download_url(zip_url, timeout=120)
        if TRACE_FETCH: print('Fetched SPDX licenses to:', licenses_zip)
        with zipfile.ZipFile(licenses_zip) as archive:
            for path in archive.namelist():
                if not (path.endswith('.json') and
                        ('/json/details/' in path
                         or '/json/exceptions/' in path)):
                    continue
                if TRACE_FETCH: print('Loading license:', path)
                if skip_oddities and path.endswith('+.json'):
                    # Skip the old plus licenses. We use them in
                    # ScanCode, but they are deprecated in SPDX.
                    continue
                details = json.loads(archive.read(path))
                lic = self.build_license(
                    mapping=details,
                    scancode_licenses=scancode_licenses,
                    skip_oddities=skip_oddities,
                )

                if lic:
                    yield lic