#!/usr/bin/env python

# This is an example of counting files using an image diff

from singularity.analysis.classify import (
    file_counts,
    extension_counts
)

container = "ubuntu.simg"

# Now we might be interested in counting different things
readme_count = file_counts(container)
copyright_count = file_counts(container, patterns=['copyright'])
authors_count = file_counts(container, patterns=['authors','thanks','credit'])
todo_count = file_counts(container, patterns=['todo'])

# Or getting a complete dict of extensions
extensions = extension_counts(container)

# Return files instead of counts
extensions = extension_counts(container, return_counts=False)
 def test_file_counts(self):
     print("Testing singularity.analysis.classify.file_counts")
     from singularity.analysis.classify import file_counts
     counts = file_counts(self.container)
Exemplo n.º 3
0
#!/usr/bin/env python

# This is an example of counting files using an image diff

from singularity.analysis.classify import (get_diff, file_counts,
                                           extension_counts)

image_package = "python:3.6.0.img.zip"

# The diff is a dict of folders --> files that differ between
# image and it's closest OS
diff = get_diff(image_package=image_package)

# Now we might be interested in counting different things
readme_count = file_counts(diff=diff)
copyright_count = file_counts(diff=diff, patterns=['copyright'])
authors_count = file_counts(diff=diff,
                            patterns=['authors', 'thanks', 'credit'])
todo_count = file_counts(diff=diff, patterns=['todo'])

# Or getting a complete dict of extensions
extensions = extension_counts(diff=diff)

# Return files instead of counts
extensions = extension_counts(diff=diff, return_counts=False)
Exemplo n.º 4
0
def run_build(build_dir,params,verbose=True, compress_image=False):
    '''run_build takes a build directory and params dictionary, and does the following:
      - downloads repo to a temporary directory
      - changes branch or commit, if needed
      - creates and bootstraps singularity image from Singularity file
      - returns a dictionary with: 
          image (path), image_package (path), metadata (dict)

    The following must be included in params: 
       spec_file, repo_url, branch, commit

    Optional parameters
       size 
    '''

    # Download the repo and image
    download_repo(repo_url=params['repo_url'],
                  destination=build_dir)

    os.chdir(build_dir)
    if params['branch'] != None:
        bot.info('Checking out branch %s' %params['branch'])
        os.system('git checkout %s' %(params['branch']))
    else:
        params['branch'] = "master"

    # Commit
    if params['commit'] not in [None,'']:
        bot.info('Checking out commit %s' %params['commit'])
        os.system('git checkout %s .' %(params['commit']))

    # From here on out commit is used as a unique id, if we don't have one, we use current
    else:
        params['commit'] = os.popen('git log -n 1 --pretty=format:"%H"').read()
        bot.warning("commit not specified, setting to current %s" %params['commit'])

    # Dump some params for the builder, in case it fails after this
    passing_params = "/tmp/params.pkl"
    pickle.dump(params,open(passing_params,'wb'))

    # Now look for spec file
    if os.path.exists(params['spec_file']):
        bot.info("Found spec file %s in repository" %params['spec_file'])

        # If the user has a symbolic link
        if os.path.islink(params['spec_file']):
            bot.info("%s is a symbolic link." %params['spec_file'])
            params['spec_file'] = os.path.realpath(params['spec_file'])

        # START TIMING
        start_time = datetime.now()
        image = build_from_spec(spec_file=params['spec_file'], # default will package the image
                                build_dir=build_dir,
                                isolated=True,
                                sandbox=False,
                                debug=params['debug'])

        # Save has for metadata (also is image name)
        version = get_image_file_hash(image)
        params['version'] = version
        pickle.dump(params,open(passing_params,'wb'))

        final_time = (datetime.now() - start_time).seconds
        bot.info("Final time of build %s seconds." %final_time)  

        # Did the container build successfully?
        test_result = test_container(image)
        if test_result['return_code'] != 0:
            bot.error("Image failed to build, cancelling.")
            sys.exit(1)

        # Get singularity version
        singularity_version = get_singularity_version()
        
        # Package the image metadata (files, folders, etc)
        image_package = package(image_path=image,
                                spec_path=params['spec_file'],
                                output_folder=build_dir,
                                remove_image=True,
                                verbose=True)

        # Derive software tags by subtracting similar OS
        diff = get_diff(image_package=image_package)

        # Inspect to get labels and other metadata

        cli = Singularity(debug=params['debug'])
        inspect = cli.inspect(image_path=image)

        # Get information on apps
        app_names = cli.apps(image_path=image)
        apps = extract_apps(image_path=image, app_names=app_names)

        # Count file types, and extensions
        counts = dict()
        counts['readme'] = file_counts(diff=diff)
        counts['copyright'] = file_counts(diff=diff,patterns=['copyright'])
        counts['authors-thanks-credit'] = file_counts(diff=diff,
                                                      patterns=['authors','thanks','credit','contributors'])
        counts['todo'] = file_counts(diff=diff,patterns=['todo'])
        extensions = extension_counts(diff=diff)

        os_sims = estimate_os(image_package=image_package,return_top=False)
        most_similar = os_sims['SCORE'].values.argmax()
        most_similar = os_sims['SCORE'].index.tolist()[most_similar]

        metrics = {'build_time_seconds':final_time,
                   'singularity_version':singularity_version,
                   'singularity_python_version':singularity_python_version, 
                   'estimated_os': most_similar,
                   'os_sims':os_sims['SCORE'].to_dict(),
                   'file_counts':counts,
                   'file_ext':extensions,
                   'inspect':inspect,
                   'version': version,
                   'apps': apps}
  
        # Compress Image
        if compress_image is True:
            compressed_image = "%s.gz" %image
            os.system('gzip -c -9 %s > %s' %(image,compressed_image))
            image = compressed_image

        output = {'image':image,
                  'image_package':image_package,
                  'metadata':metrics,
                  'params':params }

        return output

    else:
        # Tell the user what is actually there
        present_files = glob("*")
        bot.error("Build file %s not found in repository" %params['spec_file'])
        bot.info("Found files are %s" %"\n".join(present_files))
        # Params have been exported, will be found by log
        sys.exit(1)
Exemplo n.º 5
0
def run_build(build_dir, params, verbose=True):
    '''run_build takes a build directory and params dictionary, and does the following:
      - downloads repo to a temporary directory
      - changes branch or commit, if needed
      - creates and bootstraps singularity image from Singularity file
      - returns a dictionary with: 
          image (path), image_package (path), metadata (dict)

    The following must be included in params: 
       spec_file, repo_url, branch, commit

    Optional parameters
       size 
    '''

    # Download the repo and image
    download_repo(repo_url=params['repo_url'], destination=build_dir)

    os.chdir(build_dir)
    if params['branch'] != None:
        bot.logger.info('Checking out branch %s', params['branch'])
        os.system('git checkout %s' % (params['branch']))

    # Commit
    if params['commit'] not in [None, '']:
        bot.logger.info('Checking out commit %s', params['commit'])
        os.system('git checkout %s .' % (params['commit']))

    # From here on out commit is used as a unique id, if we don't have one, we use current
    else:
        params['commit'] = os.popen('git log -n 1 --pretty=format:"%H"').read()
        bot.logger.warning("commit not specified, setting to current %s",
                           params['commit'])

    # Dump some params for the builder, in case it fails after this
    passing_params = "/tmp/params.pkl"
    pickle.dump(params, open(passing_params, 'wb'))

    # If there is not a specfile, but is a Dockerfile, try building that
    if not os.path.exists(
            params['spec_file']) and os.path.exists('Dockerfile'):
        bot.logger.warning("Build file %s not found in repository",
                           params['spec_file'])
        bot.logger.warning(
            "Dockerfile found in repository, will attempt build.")
        dockerfile = dockerfile_to_singularity(dockerfile_path='Dockerfile',
                                               output_dir=build_dir)

        if dockerfile is not None:
            bot.logger.info("""\n
                                --------------------------------------------------------------
                                Dockerfile
                                --------------------------------------------------------------
                                \n%s""" % (dockerfile))

    # Now look for spec file
    if os.path.exists(params['spec_file']):
        bot.logger.info("Found spec file %s in repository",
                        params['spec_file'])

        # If size is None, set default of 800
        if params['size'] in [None, '']:
            bot.logger.info("""\n
                            --------------------------------------------------------------
                            Size not detected for build. Will first try to estimate, and then
                            use default of 800MB padding. If your build still fails, you should 
                            try setting the size manually under collection --> edit builder
                            ---------------------------------------------------------------------
                            \n""")

            # Testing estimation of size
            try:
                params['size'] = estimate_image_size(spec_file=os.path.abspath(
                    params['spec_file']),
                                                     sudopw='',
                                                     padding=params['padding'])
                bot.logger.info("Size estimated as %s", params['size'])
            except:
                params['size'] = 800
                bot.logger.info(
                    "Size estimation didn't work, using default %s",
                    params['size'])

        # START TIMING
        os.chdir(build_dir)
        start_time = datetime.now()
        image = build_from_spec(
            spec_file=params['spec_file'],  # default will package the image
            size=params['size'],
            sudopw='',  # with root should not need sudo
            build_dir=build_dir,
            debug=params['debug'])

        final_time = (datetime.now() - start_time).seconds
        bot.logger.info("Final time of build %s seconds.", final_time)

        # Did the container build successfully?
        test_result = test_container(image)
        if test_result['return_code'] == 255:
            bot.logger.error("Image failed to bootstrap, cancelling build.")
            sys.exit(1)

        # Compress image
        compressed_image = "%s.img.gz" % image
        os.system('gzip -c -9 %s > %s' % (image, compressed_image))

        # Get singularity version
        singularity_version = get_singularity_version()

        old_version = False
        if singularity_version.startswith('2.2'):
            old_version = True

        # Package the image metadata (files, folders, etc)
        image_package = package(image_path=image,
                                spec_path=params['spec_file'],
                                output_folder=build_dir,
                                sudopw='',
                                remove_image=True,
                                verbose=True,
                                old_version=old_version)

        # Derive software tags by subtracting similar OS
        diff = get_diff(image_package=image_package)

        # Get tags for services, executables
        interesting_folders = ['init', 'init.d', 'bin', 'systemd']
        tags = get_tags(search_folders=interesting_folders, diff=diff)

        # Count file types, and extensions
        counts = dict()
        counts['readme'] = file_counts(diff=diff)
        counts['copyright'] = file_counts(diff=diff, patterns=['copyright'])
        counts['authors-thanks-credit'] = file_counts(
            diff=diff, patterns=['authors', 'thanks', 'credit'])
        counts['todo'] = file_counts(diff=diff, patterns=['todo'])
        extensions = extension_counts(diff=diff)

        os_sims = estimate_os(image_package=image_package, return_top=False)
        most_similar = os_sims['SCORE'].idxmax()

        metrics = {
            'size': params['size'],
            'build_time_seconds': final_time,
            'singularity_version': singularity_version,
            'singularity_python_version': singularity_python_version,
            'estimated_os': most_similar,
            'os_sims': os_sims['SCORE'].to_dict(),
            'tags': tags,
            'file_counts': counts,
            'file_ext': extensions
        }

        output = {
            'image': compressed_image,
            'image_package': image_package,
            'metadata': metrics,
            'params': params
        }

        return output

    else:
        # Tell the user what is actually there
        present_files = glob("*")
        bot.logger.error("Build file %s not found in repository",
                         params['spec_file'])
        bot.logger.info("Found files are %s", "\n".join(present_files))
        # Params have been exported, will be found by log
        sys.exit(1)