def test_extractcode_command_works_with_relative_paths(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import extractcode
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, [test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
        assert sorted(expected) == sorted(file_result)
    finally:
        fileutils.delete(test_src_dir)
Exemplo n.º 2
0
def extract_file(location, target, kinds=extractcode.default_kinds):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if TRACE:
        logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
                     + getattr(extractor, '__module__', '')
                     + '.' + getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
        try:
            # extract first to a temp directory.
            # if there is an error,  the extracted files will not be moved
            # to target
            tmp_tgt = fileutils.get_temp_dir('extract')
            abs_location = abspath(expanduser(location))
            warnings.extend(extractor(abs_location, tmp_tgt))
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception, e:
            if TRACE:
                logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
            errors = [str(e).strip(' \'"')]
        finally:
def test_extractcode_command_works_with_relative_paths_verbose(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, ['--verbose',test_src_file])
        assert result.exit_code == 0
        # extract the path from the second line of the output
        # check that the path is relative and not absolute
        lines = result.output.splitlines(False)
        line = lines[1]
        line_path = line.split(':', 1)[-1].strip()
        if on_windows:
            drive = test_file[:2]
            assert not line_path.startswith(drive)
        else:
            assert not line_path.startswith('/')
    finally:
        fileutils.delete(test_src_dir)
Exemplo n.º 4
0
def extract_with_fallback(location, target_dir, extractor1, extractor2):
    """
    Extract archive at `location` to `target_dir` trying first `extractor1` function.
    If extract fails, attempt extraction again with the `extractor2` function.
    Return a list of warning messages. Raise exceptions on errors.

    Note: there are a few cases where the primary extractor for a type may fail and
    a secondary extractor will succeed.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # attempt extract first to a temp dir
    temp_target1 = unicode(fileutils.get_temp_dir('extract1'))
    try:
        warnings = extractor1(abs_location, temp_target1)
        if TRACE:
            logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
        fileutils.copytree(temp_target1, abs_target_dir)
    except:
        try:
            temp_target2 = unicode(fileutils.get_temp_dir('extract2'))
            warnings = extractor2(abs_location, temp_target2)
            if TRACE:
                logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
            fileutils.copytree(temp_target2, abs_target_dir)
        finally:
            fileutils.delete(temp_target2)
    finally:
        fileutils.delete(temp_target1)
    return warnings
Exemplo n.º 5
0
def create_html_app_assets(results, output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()
        assets_dir = join(get_template_dir('html-app'), 'assets')

        # delete old assets
        tgt_dirs = get_html_app_files_dirs(output_file)
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)

        # copy assets
        fileutils.copytree(assets_dir, target_dir)

        # write json data
        import json
        root_path, assets_dir = get_html_app_files_dirs(output_file)
        with open(join(root_path, assets_dir, 'data.json'), 'w') as f:
            f.write('data=' + json.dumps(results))

        # create help file
        with open(join(root_path, assets_dir, 'help.html'), 'w') as f:
            f.write(get_html_app_help(basename(output_file.name)))
    except HtmlAppAssetCopyWarning, w:
        raise w
Exemplo n.º 6
0
    def test_tree_checksum_is_different_when_file_is_removed(self):
        test_dir = self.get_test_loc('cache/tree', copy=True)

        new_file = os.path.join(test_dir, 'some.py')
        with open(new_file, 'wb') as py:
            py.write(' ')
        before = cache.tree_checksum(test_dir)

        fileutils.delete(new_file)
        after = cache.tree_checksum(test_dir)
        assert before != after
Exemplo n.º 7
0
def uncompress(location, target_dir, decompressor, suffix=EXTRACT_SUFFIX):
    """
    Uncompress a compressed file at location in the target_dir using the
    `decompressor` object. The uncompressed file is named after the original
    archive with a `suffix` added.
    Return a list of warning messages. Raise Exceptions on errors.
    """
    # FIXME: do not create a sub-directory and instead strip the "compression"
    # extension such gz, etc. or introspect the archive header to get the file
    # name when present.
    if DEBUG:
        logger.debug('uncompress: ' + location)
    tmp_loc, warnings = uncompress_file(location, decompressor)
    target_location = os.path.join(target_dir, os.path.basename(location) + suffix)
    if os.path.exists(target_location):
        fileutils.delete(target_location)
    os.rename(tmp_loc, target_location)
    return warnings
Exemplo n.º 8
0
def uncompress(location, target_dir, decompressor, suffix=EXTRACT_SUFFIX):
    """
    Uncompress a compressed file at location in the target_dir using the
    `decompressor` object. The uncompressed file is named after the original
    archive with a `suffix` added.
    Return a list of warning messages. Raise Exceptions on errors.
    """
    # FIXME: do not create a sub-directory and instead strip the "compression"
    # extension such gz, etc. or introspect the archive header to get the file
    # name when present.
    if DEBUG:
        logger.debug('uncompress: ' + location)
    tmp_loc, warnings = uncompress_file(location, decompressor)
    target_location = os.path.join(target_dir,
                                   os.path.basename(location) + suffix)
    if os.path.exists(target_location):
        fileutils.delete(target_location)
    os.rename(tmp_loc, target_location)
    return warnings
Exemplo n.º 9
0
    def test_delete_unwritable_directory_and_files(self):
        base_dir = self.get_test_loc('fileutils/readwrite', copy=True)
        test_dir = join(base_dir, 'sub')
        test_file = join(test_dir, 'file')

        try:
            # note: there are no unread/writable dir on windows
            make_non_readable(test_file)
            make_non_executable(test_file)
            make_non_writable(test_file)

            make_non_readable(test_dir)
            make_non_executable(test_dir)
            make_non_writable(test_dir)

            fileutils.delete(test_dir)
            assert not os.path.exists(test_dir)
        finally:
            fileutils.chmod(base_dir, fileutils.RW, recurse=True)
Exemplo n.º 10
0
def create_html_app_assets(output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        assets_dir = join(get_template_dir('html-app'), 'assets')
        tgt_dirs = get_html_app_files_dirs(output_file)
        if not tgt_dirs:
            raise HtmlAppAssetCopyWarning()
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)
        fileutils.copytree(assets_dir, target_dir)
    except HtmlAppAssetCopyWarning, w:
        raise w
Exemplo n.º 11
0
def create_html_app_assets(output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        assets_dir = join(get_template_dir("html-app"), "assets")
        tgt_dirs = get_html_app_files_dirs(output_file)
        if not tgt_dirs:
            raise HtmlAppAssetCopyWarning()
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)
        fileutils.copytree(assets_dir, target_dir)
    except HtmlAppAssetCopyWarning, w:
        raise w
Exemplo n.º 12
0
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux:
        location = path_to_bytes(location)
        target_dir = path_to_bytes(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = unicode(fileutils.get_temp_dir('extract'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(fileutils.file_iter(temp_target))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
Exemplo n.º 13
0
def get_gem_metadata(location):
    """
    Return the string content of the metadata of a .gem archive file at
    `location` or None
    """
    extract_loc = None
    try:
        # Extract first level of tar archive
        extract_loc = fileutils.get_temp_dir(prefix='scancode-extract-')
        abs_location = abspath(expanduser(location))
        warnings = archive.extract_tar(abs_location, extract_loc) or []
        if warnings:
            raise Exception('Failed to extract RubyGem .gem file.\n' +
                            '\n'.join(warnings))

        # The gzipped metadata is the second level of archive.
        metadata = os.path.join(extract_loc, 'metadata')
        # or it can be a plain, non-gzipped file
        metadata_gz = metadata + '.gz'

        if os.path.exists(metadata):
            with open(metadata, 'rb') as met:
                content = met.read()

        elif os.path.exists(metadata_gz):
            content, warnings = get_gz_compressed_file_content(metadata_gz)
            if warnings:
                raise Exception(
                    'Failed to extract RubyGem .gem/metadata.gz file.\n' +
                    '\n'.join(warnings))

        else:
            raise Exception('No gem metadata found in RubyGem .gem file.')

        return content

    finally:
        if extract_loc:
            fileutils.delete(extract_loc)
Exemplo n.º 14
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = unicode(fileutils.get_temp_dir('extract1'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Exemplo n.º 15
0
    def _collect_and_parse_tags(self):
        ctags_args = ['--fields=K', '--c-kinds=fp', '-f', '-', self.sourcefile]
        ctags_temp_dir = fileutils.get_temp_dir(base_dir='ctags')
        envt = {'TMPDIR': ctags_temp_dir}
        try:
            rc, stdo, err = command.execute2(cmd_loc=self.cmd_loc,
                                             ctags_args,
                                             env=envt,
                                             lib_dir=self.lib_loc,
                                             to_files=True)

            if rc != 0:
                raise Exception(open(err).read())

            with open(stdo, 'rb') as lines:
                for line in lines:
                    if 'cannot open temporary file' in line:
                        raise Exception('ctags: cannot open temporary file '
                                        ': Permission denied')

                    if line.startswith('!'):
                        continue

                    line = line.strip()
                    if not line:
                        continue

                    splitted = line.split('\t')

                    if (line.endswith('function\tfile:')
                            or line.endswith('prototype\tfile:')):
                        self.local_functions.append(splitted[0])

                    elif (line.endswith('function')
                          or line.endswith('prototype')):
                        self.global_functions.append(splitted[0])
        finally:
            fileutils.delete(ctags_temp_dir)
Exemplo n.º 16
0
def extract_with_fallback(location, target_dir, extractor1, extractor2):
    """
    Extract archive at `location` to `target_dir` trying first the primary
    `extractor1` function. If extract fails with this function, attempt
    extraction again with the fallback `extractor2` function.
    Return a list of warning messages. Raise exceptions on errors.

    Note: there are a few cases where the primary extractor for a type may fail
    and a fallback extractor will succeed.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    # attempt extract first to a temp dir
    temp_target1 = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract1-'))
    try:
        warnings = extractor1(abs_location, temp_target1)
        if TRACE:
            logger.debug(
                'extract_with_fallback: temp_target1: %(temp_target1)r' %
                locals())
        fileutils.copytree(temp_target1, abs_target_dir)
    except:
        try:
            temp_target2 = compat.unicode(
                fileutils.get_temp_dir(prefix='extractcode-extract2-'))
            warnings = extractor2(abs_location, temp_target2)
            if TRACE:
                logger.debug(
                    'extract_with_fallback: temp_target2: %(temp_target2)r' %
                    locals())
            fileutils.copytree(temp_target2, abs_target_dir)
        finally:
            fileutils.delete(temp_target2)
    finally:
        fileutils.delete(temp_target1)
    return warnings
def create_html_app_assets(results, output_file):
    """
    Given an html-app output_file, create the corresponding `_files`
    directory and copy the assets to this directory. The target
    directory is deleted if it exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()
        assets_dir = os.path.join(get_template_dir('html-app'), 'assets')

        # delete old assets
        tgt_dirs = get_html_app_files_dirs(output_file)
        target_dir = os.path.join(*tgt_dirs)
        if os.path.exists(target_dir):
            fileutils.delete(target_dir)

        # copy assets
        fileutils.copytree(assets_dir, target_dir)

        # write json data
        root_path, assets_dir = get_html_app_files_dirs(output_file)
        with codecs.open(os.path.join(root_path, assets_dir, 'data.json'),
                         'wb',
                         encoding='utf-8') as f:
            f.write('data=')
            json.dump(results, f, iterable_as_array=True)

        # create help file
        with codecs.open(os.path.join(root_path, assets_dir, 'help.html'),
                         'wb',
                         encoding='utf-8') as f:
            f.write(get_html_app_help(os.path.basename(output_file.name)))
    except HtmlAppAssetCopyWarning, w:
        raise w
def test_extractcode_command_works_with_relative_paths(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from commoncode import fileutils
    import extractcode
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(
            scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root,
                            test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, [test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result = [
            as_posixpath(f.replace(test_tgt_dir, ''))
            for f in fileutils.resource_iter(test_tgt_dir, with_dirs=False)
        ]
        assert sorted(expected) == sorted(file_result)
    finally:
        fileutils.delete(test_src_dir)
def test_extractcode_command_works_with_relative_paths_verbose(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from commoncode import fileutils
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(
            scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode,
                               ['--verbose', test_src_file])
        assert result.exit_code == 0
        # extract the path from the second line of the output
        # check that the path is relative and not absolute
        lines = result.output.splitlines(False)
        line = lines[1]
        line_path = line.split(':', 1)[-1].strip()
        if on_windows:
            drive = test_file[:2]
            assert not line_path.startswith(drive)
        else:
            assert not line_path.startswith('/')
    finally:
        fileutils.delete(test_src_dir)
Exemplo n.º 20
0
def cli(license_dir, source, trace, clean, match_text=False, match_approx=False):
    """
    Synchronize ScanCode licenses with an external license source.

    DIR is the directory to store (or load) external licenses.

    When using the dejacode source your need to set the
    'DEJACODE_API_URL' and 'DEJACODE_API_KEY' environment variables with
    your credentials.
    """
    global TRACE
    TRACE = trace

    if clean:
        fileutils.delete(license_dir)
        fileutils.delete(license_dir.rstrip('/\\') + '-new')
        fileutils.delete(license_dir.rstrip('/\\') + '-update')
        fileutils.delete(license_dir.rstrip('/\\') + '-del')

    source_cls = SOURCES[source]
    source = source_cls(license_dir, match_text, match_approx)
    synchronize_licenses(source)
    print()
Exemplo n.º 21
0
 def clear(self, *args):
     """
     Purge the cache by deleting the corresponding cached data files.
     """
     fileutils.delete(self.cache_base_dir)
Exemplo n.º 22
0
def create_html_app(output_file, results, version, scanned_path):  # NOQA
    """
    Given an html-app output_file, generate that file, create the data.js data
    file from the results and create the corresponding `_files` directory and
    copy the data and assets to this directory. The target directory is deleted
    if it exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()

        source_assets_dir = join(TEMPLATES_DIR, 'html-app', 'assets')

        # Return a tuple of (parent_dir, dir_name) directory named after the
        # `output_location` output_locationfile_base_name (stripped from extension) and
        # a `_files` suffix Return empty strings if output is to stdout.
        output_location = output_file.name
        tgt_root_path = dirname(output_location)
        tgt_assets_dir = file_base_name(output_location) + '_files'

        # delete old assets
        target_assets_dir = join(tgt_root_path, tgt_assets_dir)
        if exists(target_assets_dir):
            delete(target_assets_dir)

        # copy assets
        copytree(source_assets_dir, target_assets_dir)

        template = get_template(join(TEMPLATES_DIR, 'html-app', 'template.html'))
        rendered_html = template.render(
            assets_dir=target_assets_dir,
            scanned_path=scanned_path,
            version=version
        )
        output_file.write(rendered_html)

        # create help file
        help_template = get_template(join(TEMPLATES_DIR, 'html-app', 'help_template.html'))
        rendered_help = help_template.render(main_app=output_location)
        with io.open(join(target_assets_dir, 'help.html'), 'w', encoding='utf-8') as f:
            f.write(rendered_help)

        # write json data
        # FIXME: this should a regular JSON scan format
        if py2:
            mode = 'wb'
            prefix = b'data='
        if py3:
            mode = 'w'
            prefix = u'data='
        with io.open(join(target_assets_dir, 'data.js'), mode) as f:
            f.write(prefix)
            simplejson.dump(results, f, iterable_as_array=True)

    except HtmlAppAssetCopyWarning as w:
        raise w

    except Exception as e:  # NOQA
        import traceback
        msg = 'ERROR: cannot create HTML application.\n' + traceback.format_exc()
        raise HtmlAppAssetCopyError(msg)
Exemplo n.º 23
0
def rebuild_rootfs(img, target_dir):
    """
    Extract and merge or "squash" all layers of the `image` Image in a single
    rootfs in `target_dir`. Extraction is done in sequence from the bottom (root
    or initial) layer to the top (or latest) layer and the "whiteouts"
    unionfs/overlayfs procedure is applied at each step as per the OCI spec:
    https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts

    Return a list of deleted "whiteout" files.
    Raise an Exception on errrors.

    The extraction process consists of these steps:
     - extract the layer in a temp directory
     - find whiteouts in that layer temp dir
     - remove files/directories corresponding to these whiteouts in the target directory
     - remove whiteouts special marker files or dirs in the tempdirectory
     - move layer to the target directory, overwriting existing files

    See also some related implementations and links:
    https://github.com/moby/moby/blob/d1f470946/pkg/archive/whiteouts.go
    https://github.com/virt-manager/virt-bootstrap/blob/8a7e752d/src/virtBootstrap/whiteout.py
    https://github.com/goldmann/docker-squash

    https://github.com/moby/moby/blob/master/image/spec/v1.md
    https://github.com/moby/moby/blob/master/image/spec/v1.1.md
    https://github.com/moby/moby/blob/master/image/spec/v1.2.md
    """

    assert os.path.isdir(target_dir)

    # log  deletions
    deletions = []

    for layer_num, layer in enumerate(img.layers):
        if TRACE:
            logger.debug(f'Extracting layer {layer_num} - {layer.layer_id} '
                         f'tarball: {layer.archive_location}')

        # 1. extract a layer to temp.
        # Note that we are not preserving any special file and any file permission
        extracted_loc = tempfile.mkdtemp('container_inspector-docker')
        layer.extract(extracted_location=extracted_loc)
        if TRACE: logger.debug(f'  Extracted layer to: {extracted_loc}')

        # 2. find whiteouts in that layer.
        whiteouts = list(find_whiteouts(extracted_loc))
        if TRACE:
            logger.debug(
                '  Merging extracted layers and applying unionfs whiteouts')
        if TRACE:
            logger.debug('  Whiteouts:\n' +
                         '     \n'.join(map(repr, whiteouts)))

        # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs)
        for whiteout_marker_loc, whiteable_path in whiteouts:
            if TRACE:
                logger.debug(
                    f'    Deleting dir or file with whiteout marker: {whiteout_marker_loc}'
                )
            whiteable_loc = os.path.join(target_dir, whiteable_path)
            delete(whiteable_loc)
            # also delete the whiteout marker file
            delete(whiteout_marker_loc)
            deletions.append(whiteable_loc)

        # 4. finall copy/overwrite the extracted layer over the WIP rootfs
        if TRACE:
            logger.debug(
                f'  Moving extracted layer from: {extracted_loc} to: {target_dir}'
            )
        copytree(extracted_loc, target_dir)
        if TRACE: logger.debug(f'  Moved layer to: {target_dir}')
        delete(extracted_loc)

    return deletions
Exemplo n.º 24
0
 def clear(self, *args):
     """
     Purge the cache by deleting the corresponding cached data files.
     """
     fileutils.delete(self.cache_base_dir)
Exemplo n.º 25
0
    def test_get_or_build_index_through_cache(self):
        # note: this is a rather complex test because caching involves some globals
        license_index_cache_dir = self.get_temp_dir('index_cache')
        _index_lock_file = os.path.join(license_index_cache_dir, 'lockfile')
        _tree_checksum_file = os.path.join(license_index_cache_dir,
                                           'tree_checksums')
        _index_cache_file = os.path.join(license_index_cache_dir,
                                         'index_cache')

        _tree_base_dir = self.get_temp_dir('src_dir')

        _licenses_dir = self.get_test_loc('cache/data', copy=True)
        _licenses_data_dir = os.path.join(_licenses_dir, 'licenses')
        _rules_data_dir = os.path.join(_licenses_dir, 'rules')

        _timeout = 10

        assert not os.path.exists(_tree_checksum_file)
        assert not os.path.exists(_index_cache_file)
        assert not os.path.exists(_index_lock_file)

        check_consistency = True
        return_index = False

        # when a new index is built, new index files are created
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)

        assert os.path.exists(_tree_checksum_file)
        assert os.path.exists(_index_cache_file)
        assert not os.path.exists(_index_lock_file)

        # when nothing changed a new index files is not created
        tree_before = open(_tree_checksum_file).read()
        idx_checksum_before = hash.sha1(_index_cache_file)
        idx_date_before = date.get_file_mtime(_index_cache_file)
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)
        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # now add some file in the source tree
        new_file = os.path.join(_tree_base_dir, 'some file')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        # when check_consistency is False, the index is not rebuild when
        # new files are added
        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)
        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # when check_consistency is True, the index is rebuilt when new
        # files are added
        check_consistency = True
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)
        assert tree_before != open(_tree_checksum_file).read()
        assert idx_date_before != date.get_file_mtime(_index_cache_file)

        # now add some ignored file in the source tree
        tree_before = open(_tree_checksum_file).read()
        idx_checksum_before = hash.sha1(_index_cache_file)
        idx_date_before = date.get_file_mtime(_index_cache_file)
        new_file = os.path.join(_tree_base_dir, 'some file.pyc')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        check_consistency = True
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # if the treechecksum file dies the index is rebuilt
        fileutils.delete(_tree_checksum_file)
        idx_checksum_before = hash.sha1(_index_cache_file)

        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert idx_date_before != date.get_file_mtime(_index_cache_file)

        # if the index cache file dies the index is rebuilt
        fileutils.delete(_index_cache_file)

        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency, return_index, _tree_base_dir,
            _tree_checksum_file, _index_lock_file, _index_cache_file,
            _licenses_data_dir, _rules_data_dir, _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert os.path.exists(_index_cache_file)
Exemplo n.º 26
0
    def test_build_index(self):
        # note: this is a rather complex test because caching involves some globals
        cache_dir = self.get_temp_dir('index_cache')
        lock_file, checksum_file, cache_file = get_license_cache_paths(cache_dir=cache_dir)
        tree_base_dir = self.get_temp_dir('src_dir')
        licenses_data_dir = self.get_test_loc('cache/data/licenses', copy=True)
        rules_data_dir = self.get_test_loc('cache/data/rules', copy=True)

        # now add some file in the mock source tree
        new_file = os.path.join(tree_base_dir, 'some.py')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        timeout = 10

        assert not os.path.exists(checksum_file)
        assert not os.path.exists(cache_file)
        assert not os.path.exists(lock_file)

        # when a new index is built, new index files are created
        check_consistency = True
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)

        assert os.path.exists(checksum_file)
        assert os.path.exists(cache_file)
        assert not os.path.exists(lock_file)

        # when nothing changed a new index files is not created
        tree_before = open(checksum_file).read()
        idx_checksum_before = hash.sha1(cache_file)
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)
        assert tree_before == open(checksum_file).read()
        assert idx_checksum_before == hash.sha1(cache_file)

        # now add some file in the source tree
        new_file = os.path.join(tree_base_dir, 'some file')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        # when check_consistency is False, the index is not rebuild when
        # new files are added
        check_consistency = False
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)
        assert tree_before == open(checksum_file).read()
        assert idx_checksum_before == hash.sha1(cache_file)

        # when check_consistency is True, the index is rebuilt when new
        # files are added
        check_consistency = True
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)
        assert tree_before != open(checksum_file).read()

        # now add some ignored file in the source tree
        tree_before = open(checksum_file).read()
        idx_checksum_before = hash.sha1(cache_file)
        new_file = os.path.join(tree_base_dir, 'some file.pyc')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        # when check_consistency is True, the index is not rebuilt when new
        # files are added that are ignored
        check_consistency = True
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)

        assert tree_before == open(checksum_file).read()
        assert idx_checksum_before == hash.sha1(cache_file)

        # if the treechecksum file dies, the index is rebuilt
        fileutils.delete(checksum_file)
        idx_checksum_before = hash.sha1(cache_file)

        check_consistency = False
        cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)

        assert tree_before == open(checksum_file).read()

        # if the index cache file dies the index is rebuilt
        fileutils.delete(cache_file)

        check_consistency = False
        idx1 = cache.get_cached_index(cache_dir, check_consistency, timeout,
                               tree_base_dir, licenses_data_dir, rules_data_dir)

        # load index, forced from file
        idx2 = cache.load_index(cache_file)
        assert idx1.to_dict(True) == idx2.to_dict(True)

        # reset global caches
        cache._LICENSE_SYMBOLS_BY_SPDX_KEY = {}
        cache._LICENSES_BY_KEY_INDEX = None
        cache._UNKNOWN_SPDX_SYMBOL = None
        cache._LICENSES_BY_KEY = None
Exemplo n.º 27
0
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN):
    """
    Extract and merge all layers to target_dir. Extraction is done in
    sequence from bottom (root) to top (latest layer).

    Return a mapping of errors and a list of whiteouts/deleted files.

    The extraction process consists of these steps:
     - extract the layer in a temp directory
     - move layer to the target directory, overwriting existing files
     - if any, remove AUFS special files/dirs in the target directory
     - if any, remove whiteouts file/directory pairs in the target directory
    """

    from extractcode.extract import extract_file

    assert filetype.is_dir(target_dir)
    assert os.path.exists(target_dir)
    extract_errors = []
    # log whiteouts deletions
    whiteouts = []

    for layer_id, layer in image.layers.items():
        layer_tarball = join(image.repo_dir, layer_id[:layerid_len],
                             LAYER_TAR_FILE)
        logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals())
        temp_target = fileutils.get_temp_dir('conan-docker')
        xevents = list(extract_file(layer_tarball, temp_target))
        for x in xevents:
            if x.warnings or x.errors:
                extract_errors.extend(xevents)

        # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly
        # it should be:
        # 1. extract a layer to temp.
        # 2. find whiteouts in that layer.
        # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs)
        # 4. finall copy the extracted layer over the WIP rootfs

        # move extracted layer to target_dir
        logger.debug(
            'Moving extracted layer from: %(temp_target)r to: %(target_dir)r')
        fileutils.copytree(temp_target, target_dir)
        fileutils.delete(temp_target)

        logger.debug(
            'Merging extracted layers and applying AUFS whiteouts/deletes')
        for top, dirs, files in fileutils.walk(target_dir):
            # delete AUFS dirs and apply whiteout deletions
            for dr in dirs[:]:
                whiteable_dir = join(top, dr)
                if dr.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. dir...
                    dirs.remove(dr)
                    logger.debug('Deleting whiteout dir: %(whiteable_dir)r' %
                                 locals())
                    fileutils.delete(whiteable_dir)

                    # ... and delete the corresponding dir it does "whiteout"
                    base_dir = dr[len(WHITEOUT_PREFIX):]
                    try:
                        dirs.remove(base_dir)
                    except ValueError:
                        # FIXME: should we really raise an exception here?
                        msg = ('Inconsistent layers: '
                               'missing directory to whiteout: %(base_dir)r' %
                               locals())
                        raise InconsistentLayersError(msg)
                    wdo = join(top, base_dir)
                    logger.debug('Deleting real dir:  %(wdo)r' % locals())
                    fileutils.delete(wdo)
                    whiteouts.append(wdo)

                # delete AUFS special dirs
                elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    dirs.remove(dr)
                    logger.debug(
                        'Deleting AUFS special dir:  %(whiteable_dir)r' %
                        locals())
                    fileutils.delete(whiteable_dir)

            # delete AUFS files and apply whiteout deletions
            all_files = set(files)
            for fl in all_files:
                whiteable_file = join(top, fl)
                if fl.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. marker file...
                    logger.debug('Deleting whiteout file: %(whiteable_file)r' %
                                 locals())
                    fileutils.delete(whiteable_file)
                    # ... and delete the corresponding file it does "whiteout"
                    # e.g. logically delete
                    base_file = fl[len(WHITEOUT_PREFIX):]

                    wfo = join(top, base_file)
                    whiteouts.append(wfo)
                    if base_file in all_files:
                        logger.debug('Deleting real file:  %(wfo)r' % locals())
                        fileutils.delete(wfo)

                # delete AUFS special files
                elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    logger.debug(
                        'Deleting AUFS special file:  %(whiteable_file)r' %
                        locals())
                    fileutils.delete(whiteable_file)
                    whiteouts.append(whiteable_file)

    return extract_errors, whiteouts
Exemplo n.º 28
0
    def test_get_or_build_index_through_cache(self):
        # note: this is a rather complex test because caching involves some globals
        license_index_cache_dir = self.get_temp_dir('index_cache')
        _index_lock_file = os.path.join(license_index_cache_dir, 'lockfile')
        _tree_checksum_file = os.path.join(license_index_cache_dir, 'tree_checksums')
        _index_cache_file = os.path.join(license_index_cache_dir, 'index_cache')

        _tree_base_dir = self.get_temp_dir('src_dir')

        _licenses_dir = self.get_test_loc('cache/data', copy=True)
        _licenses_data_dir = os.path.join(_licenses_dir, 'licenses')
        _rules_data_dir = os.path.join(_licenses_dir, 'rules')

        _timeout = 10

        assert not os.path.exists(_tree_checksum_file)
        assert not os.path.exists(_index_cache_file)
        assert not os.path.exists(_index_lock_file)

        check_consistency = True
        return_index = False

        # when a new index is built, new index files are created
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)

        assert os.path.exists(_tree_checksum_file)
        assert os.path.exists(_index_cache_file)
        assert not os.path.exists(_index_lock_file)

        # when nothing changed a new index files is not created
        tree_before = open(_tree_checksum_file).read()
        idx_checksum_before = hash.sha1(_index_cache_file)
        idx_date_before = date.get_file_mtime(_index_cache_file)
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)
        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # now add some file in the source tree
        new_file = os.path.join(_tree_base_dir, 'some file')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        # when check_consistency is False, the index is not rebuild when
        # new files are added
        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)
        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # when check_consistency is True, the index is rebuilt when new
        # files are added
        check_consistency = True
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)
        assert tree_before != open(_tree_checksum_file).read()
        assert idx_date_before != date.get_file_mtime(_index_cache_file)

        # now add some ignored file in the source tree
        tree_before = open(_tree_checksum_file).read()
        idx_checksum_before = hash.sha1(_index_cache_file)
        idx_date_before = date.get_file_mtime(_index_cache_file)
        new_file = os.path.join(_tree_base_dir, 'some file.pyc')
        with open(new_file, 'wb') as nf:
            nf.write('somthing')

        check_consistency = True
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert idx_checksum_before == hash.sha1(_index_cache_file)
        assert idx_date_before == date.get_file_mtime(_index_cache_file)

        # if the treechecksum file dies the index is rebuilt
        fileutils.delete(_tree_checksum_file)
        idx_checksum_before = hash.sha1(_index_cache_file)

        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert idx_date_before != date.get_file_mtime(_index_cache_file)

        # if the index cache file dies the index is rebuilt
        fileutils.delete(_index_cache_file)

        check_consistency = False
        cache.get_or_build_index_through_cache(
            check_consistency,
            return_index,
            _tree_base_dir,
            _tree_checksum_file,
            _index_lock_file,
            _index_cache_file,
            _licenses_data_dir,
            _rules_data_dir,
            _timeout)

        assert tree_before == open(_tree_checksum_file).read()
        assert os.path.exists(_index_cache_file)
Exemplo n.º 29
0
    def test_LicenseCache_load_or_build(self):

        # recreate internal paths for testing
        licensedcode_cache_dir = self.get_temp_dir('index_cache')
        scancode_cache_dir = self.get_temp_dir('index_metafiles')
        idx_cache_dir = os.path.join(licensedcode_cache_dir,
                                     cache.LICENSE_INDEX_DIR)
        fileutils.create_dir(idx_cache_dir)
        cache_file = os.path.join(idx_cache_dir, cache.LICENSE_INDEX_FILENAME)
        lock_file = os.path.join(scancode_cache_dir,
                                 cache.LICENSE_LOCKFILE_NAME)
        checksum_file = os.path.join(scancode_cache_dir,
                                     cache.LICENSE_CHECKSUM_FILE)

        tree_base_dir = self.get_temp_dir('src_dir')
        licenses_data_dir = self.get_test_loc('cache/data/licenses', copy=True)
        rules_data_dir = self.get_test_loc('cache/data/rules', copy=True)

        # now add some file in the mock source tree
        new_file = os.path.join(tree_base_dir, 'some.py')
        with open(new_file, 'w') as nf:
            nf.write('somthing')

        assert not os.path.exists(checksum_file)
        assert not os.path.exists(cache_file)
        assert not os.path.exists(lock_file)

        timeout = 10

        # when a new cache is built, new cache files are created
        check_consistency = True
        _cached1 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert os.path.exists(checksum_file)
        assert os.path.exists(cache_file)

        # when nothing changed a new index files is not created
        tree_before = open(checksum_file).read()
        idx_checksum_before = hash.sha1(cache_file)
        _cached2 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert open(checksum_file).read() == tree_before
        assert hash.sha1(cache_file) == idx_checksum_before

        # now add some file in the source tree
        new_file = os.path.join(tree_base_dir, 'some file')
        with open(new_file, 'w') as nf:
            nf.write('somthing')

        # when check_consistency is False, the index is not rebuild when
        # new files are added
        check_consistency = False
        _cached3 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )
        assert open(checksum_file).read() == tree_before
        assert hash.sha1(cache_file) == idx_checksum_before

        # when check_consistency is True, the index is rebuilt when new
        # files are added
        check_consistency = True
        _cached4 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )
        assert open(checksum_file).read() != tree_before

        # now add some ignored file in the source tree
        tree_before = open(checksum_file).read()
        idx_checksum_before = hash.sha1(cache_file)
        new_file = os.path.join(tree_base_dir, 'some file.pyc')
        with open(new_file, 'w') as nf:
            nf.write('somthing')

        # when check_consistency is True, the index is not rebuilt when new
        # files are added that are ignored
        check_consistency = True
        _cached5 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert open(checksum_file).read() == tree_before
        assert hash.sha1(cache_file) == idx_checksum_before

        # if the treechecksum file dies, the index is not rebuilt if
        # check_consistency is False. and no new checksum is created
        fileutils.delete(checksum_file)
        idx_checksum_before = hash.sha1(cache_file)

        check_consistency = False
        _cached6 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert not os.path.exists(checksum_file)

        # with the treechecksum file gone, the index is rebuilt if
        # check_consistency is True and a new checksum is created
        idx_checksum_before = hash.sha1(cache_file)

        check_consistency = True
        _cached7 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert open(checksum_file).read() == tree_before

        # if the index cache file dies the index is rebuilt
        fileutils.delete(cache_file)
        check_consistency = False
        cached8 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            check_consistency=check_consistency,
            timeout=timeout,
            tree_base_dir=tree_base_dir,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )
        idx1 = cached8.index

        # load index, forced from file
        cached9 = cache.load_cache_file(cache_file)
        idx2 = cached9.index
        assert set(idx2.dictionary.keys()) == set(idx1.dictionary.keys())
Exemplo n.º 30
0
    def test_LicenseCache_load_or_build_from_empty(self):
        # recreate internal paths for testing
        licensedcode_cache_dir = self.get_temp_dir('index_cache')
        scancode_cache_dir = self.get_temp_dir('index_metafiles')
        idx_cache_dir = os.path.join(licensedcode_cache_dir,
                                     cache.LICENSE_INDEX_DIR)
        fileutils.create_dir(idx_cache_dir)
        cache_file = os.path.join(idx_cache_dir, cache.LICENSE_INDEX_FILENAME)
        lock_file = os.path.join(scancode_cache_dir,
                                 cache.LICENSE_LOCKFILE_NAME)

        licenses_data_dir = self.get_test_loc('cache/data/licenses', copy=True)
        rules_data_dir = self.get_test_loc('cache/data/rules', copy=True)

        assert not os.path.exists(cache_file)
        assert not os.path.exists(lock_file)

        timeout = 10

        # when a new cache is built, new cache files are created
        _cached1 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            force=False,
            timeout=timeout,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert os.path.exists(cache_file)
        fileutils.delete(cache_file)

        # force=True builds an index too if none exists
        _cached2 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            force=True,
            timeout=timeout,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert os.path.exists(cache_file)

        # force=True rebuilds an index
        idx_checksum_before = hash.sha1(cache_file)

        _cached3 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            force=True,
            timeout=timeout,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )

        assert hash.sha1(cache_file) != idx_checksum_before

        # force=False loads an index
        idx_checksum_before = hash.sha1(cache_file)

        _cached4 = cache.LicenseCache.load_or_build(
            licensedcode_cache_dir=licensedcode_cache_dir,
            scancode_cache_dir=scancode_cache_dir,
            force=False,
            timeout=timeout,
            licenses_data_dir=licenses_data_dir,
            rules_data_dir=rules_data_dir,
        )
        assert hash.sha1(cache_file) == idx_checksum_before