Exemplo n.º 1
0
    def singularity_pull_image(self, container, out_path, cache_path,
                               progress):
        """Pull a singularity image using ``singularity pull``

        Attempt to use a local installation of singularity to pull the image.

        Args:
            container (str): A pipeline's container name. Usually it is of similar format
                to ``nfcore/name:version``.

        Raises:
            Various exceptions possible from `subprocess` execution of Singularity.
        """
        output_path = cache_path or out_path

        # Pull using singularity
        address = "docker://{}".format(container.replace("docker://", ""))
        singularity_command = [
            "singularity", "pull", "--name", output_path, address
        ]
        log.debug("Building singularity image: {}".format(address))
        log.debug("Singularity command: {}".format(
            " ".join(singularity_command)))

        # Progress bar to show that something is happening
        task = progress.add_task(container,
                                 start=False,
                                 total=False,
                                 progress_type="singularity_pull",
                                 current_log="")

        # Run the singularity pull command
        proc = subprocess.Popen(
            singularity_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            bufsize=1,
        )
        for line in proc.stdout:
            log.debug(line.strip())
            progress.update(task, current_log=line.strip())

        # Copy cached download if we are using the cache
        if cache_path:
            log.debug("Copying {} from cache: '{}'".format(
                container, os.path.basename(out_path)))
            progress.update(
                task, current_log="Copying from cache to target directory")
            shutil.copyfile(cache_path, out_path)

        progress.remove_task(task)
Exemplo n.º 2
0
    def lint_project(self,
                     calling_class,
                     check_functions: list = None,
                     custom_check_files: bool = False,
                     is_subclass_calling=True) -> None:
        """Main linting function.
        Takes the template directory as the primary input and iterates through
        the different linting checks in order. Collects any warnings or errors
        and returns summary at completion. Raises an exception if there is a
        critical error that makes the rest of the tests pointless (eg. no
        project script). Results from this function are printed by the main script.

        :param calling_class: The class that calls the function -> used to get the class methods, which are the linting methods
        :param check_functions: List of functions of the calling class that should be checked. If not set, the default TemplateLinter check functions are called
        :param custom_check_files: Set to true if TemplateLinter check_files_exist should not be run
        :param is_subclass_calling: Indicates whether a domain specific linter calls the linting or not
        """
        # Called on its own, so not from a subclass -> run general linting
        if check_functions is None:
            # Fetch all general linting functions
            check_functions = [
                func for func in dir(TemplateLinter)
                if (callable(getattr(TemplateLinter, func))
                    and not func.startswith('_'))
            ]
            # Remove internal functions
            check_functions = list(
                set(check_functions).difference(
                    {'lint_project', 'print_results', 'check_version_match'}))
        # Some templates (e.g. latex based) do not adhere to the common programming based templates and therefore do not need to check for e.g. docs
        # or lint changelog
        if custom_check_files:
            check_functions.remove('check_files_exist')
            check_functions.remove('lint_changelog')

        progress = rich.progress.Progress(
            "[bold green]{task.description}",
            rich.progress.BarColumn(bar_width=None),
            "[bold yellow]{task.completed} of {task.total}[reset] [bold green]{task.fields[func_name]}",
        )
        with progress:
            lint_progress = progress.add_task("Running lint checks",
                                              total=len(check_functions),
                                              func_name=check_functions)
            for fun_name in check_functions:
                progress.update(lint_progress, advance=1, func_name=fun_name)
                if fun_name == 'check_files_exist':
                    getattr(calling_class, fun_name)(is_subclass_calling)
                else:
                    getattr(calling_class, fun_name)()
Exemplo n.º 3
0
    async def migrate_to(
        cls,
        new_driver_cls: Type["BaseDriver"],
        all_custom_group_data: Dict[str, Dict[str, Dict[str, int]]],
    ) -> None:
        """Migrate data from this backend to another.

        Both drivers must be initialized beforehand.

        This will only move the data - no instance metadata is modified
        as a result of this operation.

        Parameters
        ----------
        new_driver_cls
            Subclass of `BaseDriver`.
        all_custom_group_data : Dict[str, Dict[str, Dict[str, int]]]
            Dict mapping cog names, to cog IDs, to custom groups, to
            primary key lengths.

        """
        # Backend-agnostic method of migrating from one driver to another.
        with rich.progress.Progress(
            rich.progress.SpinnerColumn(),
            rich.progress.TextColumn("[progress.description]{task.description}"),
            RichIndefiniteBarColumn(),
            rich.progress.TextColumn("{task.completed} cogs processed"),
            rich.progress.TimeElapsedColumn(),
        ) as progress:
            cog_count = 0
            tid = progress.add_task("[yellow]Migrating", completed=cog_count, total=cog_count + 1)
            async for cog_name, cog_id in cls.aiter_cogs():
                progress.console.print(f"Working on {cog_name}...")

                this_driver = cls(cog_name, cog_id)
                other_driver = new_driver_cls(cog_name, cog_id)
                custom_group_data = all_custom_group_data.get(cog_name, {}).get(cog_id, {})
                exported_data = await this_driver.export_data(custom_group_data)
                await other_driver.import_data(exported_data, custom_group_data)

                cog_count += 1
                progress.update(tid, completed=cog_count, total=cog_count + 1)
            progress.update(tid, total=cog_count)
        print()
Exemplo n.º 4
0
def download_response(response: Response, download: typing.BinaryIO) -> None:
    console = rich.console.Console()
    console.print()
    content_length = response.headers.get("Content-Length")
    with rich.progress.Progress(
        "[progress.description]{task.description}",
        "[progress.percentage]{task.percentage:>3.0f}%",
        rich.progress.BarColumn(bar_width=None),
        rich.progress.DownloadColumn(),
        rich.progress.TransferSpeedColumn(),
    ) as progress:
        description = f"Downloading [bold]{rich.markup.escape(download.name)}"
        download_task = progress.add_task(
            description,
            total=int(content_length or 0),
            start=content_length is not None,
        )
        for chunk in response.iter_bytes():
            download.write(chunk)
            progress.update(download_task, completed=response.num_bytes_downloaded)
Exemplo n.º 5
0
def download_response(response: httpx.Response) -> None:
    console = rich.console.Console()
    syntax = rich.syntax.Syntax("", "http", theme="ansi_dark", word_wrap=True)
    console.print(syntax)

    filename = get_download_filename(response)
    content_length = response.headers.get("Content-Length")
    kwargs = {"total": int(content_length)} if content_length else {}
    with open(filename, mode="bw") as download_file:
        with rich.progress.Progress(
            "[progress.description]{task.description}",
            "[progress.percentage]{task.percentage:>3.0f}%",
            rich.progress.BarColumn(bar_width=None),
            rich.progress.DownloadColumn(),
            rich.progress.TransferSpeedColumn(),
        ) as progress:
            description = f"Downloading [bold]{filename}"
            download_task = progress.add_task(description, **kwargs)  # type: ignore
            for chunk in response.iter_bytes():
                download_file.write(chunk)
                progress.update(download_task, completed=response.num_bytes_downloaded)
Exemplo n.º 6
0
def test_progress_max_refresh() -> None:
    """Test max_refresh argument."""
    time = 0.0

    def get_time() -> float:
        nonlocal time
        try:
            return time
        finally:
            time = time + 1.0

    console = Console(
        color_system=None,
        width=80,
        legacy_windows=False,
        force_terminal=True,
        _environ={},
    )
    column = TextColumn("{task.description}")
    column.max_refresh = 3
    progress = Progress(
        column,
        get_time=get_time,
        auto_refresh=False,
        console=console,
    )
    console.begin_capture()
    with progress:
        task_id = progress.add_task("start")
        for tick in range(6):
            progress.update(task_id, description=f"tick {tick}")
            progress.refresh()
    result = console.end_capture()
    print(repr(result))
    assert (
        result
        == "\x1b[?25l\r\x1b[2Kstart\r\x1b[2Kstart\r\x1b[2Ktick 1\r\x1b[2Ktick 1\r\x1b[2Ktick 3\r\x1b[2Ktick 3\r\x1b[2Ktick 5\r\x1b[2Ktick 5\n\x1b[?25h"
    )
Exemplo n.º 7
0
def make_progress() -> Progress:
    _time = 0.0

    def fake_time():
        nonlocal _time
        try:
            return _time
        finally:
            _time += 1

    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        color_system="truecolor",
        width=80,
        legacy_windows=False,
        _environ={},
    )
    progress = Progress(console=console,
                        get_time=fake_time,
                        auto_refresh=False)
    task1 = progress.add_task("foo")
    task2 = progress.add_task("bar", total=30)
    progress.advance(task2, 16)
    task3 = progress.add_task("baz", visible=False)
    task4 = progress.add_task("egg")
    progress.remove_task(task4)
    task4 = progress.add_task("foo2", completed=50, start=False)
    progress.stop_task(task4)
    progress.start_task(task4)
    progress.update(task4,
                    total=200,
                    advance=50,
                    completed=200,
                    visible=True,
                    refresh=True)
    progress.stop_task(task4)
    return progress
Exemplo n.º 8
0
def get_filelist(run_module_names):
    """
    Go through all supplied search directories and assembly a master
    list of files to search. Then fire search functions for each file.
    """
    # Prep search patterns
    spatterns = [{}, {}, {}, {}, {}, {}, {}]
    runtimes["sp"] = defaultdict()
    ignored_patterns = []
    skipped_patterns = []
    for key, sps in config.sp.items():
        mod_name = key.split("/", 1)[0]
        if mod_name.lower() not in [m.lower() for m in run_module_names]:
            ignored_patterns.append(key)
            continue
        files[key] = list()
        if not isinstance(sps, list):
            sps = [sps]

        # Warn if we have any unrecognised search pattern keys
        expected_sp_keys = [
            "fn",
            "fn_re",
            "contents",
            "contents_re",
            "num_lines",
            "shared",
            "skip",
            "max_filesize",
            "exclude_fn",
            "exclude_fn_re",
            "exclude_contents",
            "exclude_contents_re",
        ]
        unrecognised_keys = [
            y for x in sps for y in x.keys() if y not in expected_sp_keys
        ]
        if len(unrecognised_keys) > 0:
            logger.warning(
                "Unrecognised search pattern keys for '{}': {}".format(
                    key, ", ".join(unrecognised_keys)))

        # Check if we are skipping this search key
        if any([x.get("skip") for x in sps]):
            skipped_patterns.append(key)

        # Split search patterns according to speed of execution.
        if any([x for x in sps if "contents_re" in x]):
            if any([x for x in sps if "num_lines" in x]):
                spatterns[4][key] = sps
            elif any([x for x in sps if "max_filesize" in x]):
                spatterns[5][key] = sps
            else:
                spatterns[6][key] = sps
        elif any([x for x in sps if "contents" in x]):
            if any([x for x in sps if "num_lines" in x]):
                spatterns[1][key] = sps
            elif any([x for x in sps if "max_filesize" in x]):
                spatterns[2][key] = sps
            else:
                spatterns[3][key] = sps
        else:
            spatterns[0][key] = sps

    if len(ignored_patterns) > 0:
        logger.debug(
            "Ignored {} search patterns as didn't match running modules.".
            format(len(ignored_patterns)))

    if len(skipped_patterns) > 0:
        logger.info("Skipping {} file search patterns".format(
            len(skipped_patterns)))
        logger.debug("Skipping search patterns: {}".format(
            ", ".join(skipped_patterns)))

    def add_file(fn, root):
        """
        Function applied to each file found when walking the analysis
        directories. Runs through all search patterns and returns True
        if a match is found.
        """
        f = {"fn": fn, "root": root}

        # Check that this is a file and not a pipe or anything weird
        if not os.path.isfile(os.path.join(root, fn)):
            file_search_stats["skipped_not_a_file"] += 1
            return False

        # Check that we don't want to ignore this file
        i_matches = [
            n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)
        ]
        if len(i_matches) > 0:
            logger.debug(
                "Ignoring file as matched an ignore pattern: {}".format(fn))
            file_search_stats["skipped_ignore_pattern"] += 1
            return False

        # Limit search to small files, to avoid 30GB FastQ files etc.
        try:
            f["filesize"] = os.path.getsize(os.path.join(root, fn))
        except (IOError, OSError, ValueError, UnicodeDecodeError):
            logger.debug(
                "Couldn't read file when checking filesize: {}".format(fn))
        else:
            if f["filesize"] > config.log_filesize_limit:
                file_search_stats["skipped_filesize_limit"] += 1
                return False

        # Test file for each search pattern
        file_matched = False
        for patterns in spatterns:
            for key, sps in patterns.items():
                start = time.time()
                for sp in sps:
                    if search_file(sp, f, key):
                        # Check that we shouldn't exclude this file
                        if not exclude_file(sp, f):
                            # Looks good! Remember this file
                            files[key].append(f)
                            file_search_stats[key] = file_search_stats.get(
                                key, 0) + 1
                            file_matched = True
                        # Don't keep searching this file for other modules
                        if not sp.get("shared", False):
                            runtimes["sp"][key] = runtimes["sp"].get(
                                key, 0) + (time.time() - start)
                            return True
                        # Don't look at other patterns for this module
                        else:
                            break
                runtimes["sp"][key] = runtimes["sp"].get(
                    key, 0) + (time.time() - start)

        return file_matched

    # Go through the analysis directories and get file list
    multiqc_installation_dir_files = [
        "LICENSE",
        "CHANGELOG.md",
        "Dockerfile",
        "MANIFEST.in",
        ".gitmodules",
        "README.md",
        "CSP.txt",
        "setup.py",
        ".gitignore",
    ]
    total_sp_starttime = time.time()
    for path in config.analysis_dir:
        if os.path.islink(path) and config.ignore_symlinks:
            file_search_stats["skipped_symlinks"] += 1
            continue
        elif os.path.isfile(path):
            searchfiles.append([os.path.basename(path), os.path.dirname(path)])
        elif os.path.isdir(path):
            for root, dirnames, filenames in os.walk(
                    path, followlinks=(not config.ignore_symlinks),
                    topdown=True):
                bname = os.path.basename(root)

                # Skip any sub-directories matching ignore params
                orig_dirnames = dirnames[:]
                for n in config.fn_ignore_dirs:
                    dirnames[:] = [
                        d for d in dirnames
                        if not fnmatch.fnmatch(d, n.rstrip(os.sep))
                    ]
                    if len(orig_dirnames) != len(dirnames):
                        removed_dirs = [
                            os.path.join(root, d)
                            for d in set(orig_dirnames).symmetric_difference(
                                set(dirnames))
                        ]
                        logger.debug(
                            "Ignoring directory as matched fn_ignore_dirs: {}".
                            format(", ".join(removed_dirs)))
                        orig_dirnames = dirnames[:]
                for n in config.fn_ignore_paths:
                    dirnames[:] = [
                        d for d in dirnames if not fnmatch.fnmatch(
                            os.path.join(root, d), n.rstrip(os.sep))
                    ]
                    if len(orig_dirnames) != len(dirnames):
                        removed_dirs = [
                            os.path.join(root, d)
                            for d in set(orig_dirnames).symmetric_difference(
                                set(dirnames))
                        ]
                        logger.debug(
                            "Ignoring directory as matched fn_ignore_paths: {}"
                            .format(", ".join(removed_dirs)))

                # Skip *this* directory if matches ignore params
                d_matches = [
                    n for n in config.fn_ignore_dirs
                    if fnmatch.fnmatch(bname, n.rstrip(os.sep))
                ]
                if len(d_matches) > 0:
                    logger.debug(
                        "Ignoring directory as matched fn_ignore_dirs: {}".
                        format(bname))
                    continue
                p_matches = [
                    n for n in config.fn_ignore_paths
                    if fnmatch.fnmatch(root, n.rstrip(os.sep))
                ]
                if len(p_matches) > 0:
                    logger.debug(
                        "Ignoring directory as matched fn_ignore_paths: {}".
                        format(root))
                    continue

                # Sanity check - make sure that we're not just running in the installation directory
                if len(filenames) > 0 and all(
                    [fn in filenames
                     for fn in multiqc_installation_dir_files]):
                    logger.error(
                        "Error: MultiQC is running in source code directory! {}"
                        .format(root))
                    logger.warning(
                        "Please see the docs for how to use MultiQC: https://multiqc.info/docs/#running-multiqc"
                    )
                    dirnames[:] = []
                    filenames[:] = []
                    continue

                # Search filenames in this directory
                for fn in filenames:
                    searchfiles.append([fn, root])

    # Search through collected files
    progress_obj = rich.progress.Progress(
        "[blue]|[/]      ",
        rich.progress.SpinnerColumn(),
        "[blue]{task.description}[/] |",
        rich.progress.BarColumn(),
        "[progress.percentage]{task.percentage:>3.0f}%",
        "[green]{task.completed}/{task.total}",
        "[dim]{task.fields[s_fn]}",
    )
    with progress_obj as progress:
        mqc_task = progress.add_task("searching",
                                     total=len(searchfiles),
                                     s_fn="")
        for sf in searchfiles:
            progress.update(mqc_task,
                            advance=1,
                            s_fn=os.path.join(sf[1], sf[0])[-50:])
            if not add_file(sf[0], sf[1]):
                file_search_stats["skipped_no_match"] += 1
        progress.update(mqc_task, s_fn="")

    runtimes["total_sp"] = time.time() - total_sp_starttime
Exemplo n.º 9
0
def run(prefix, archive, program, prefix_specified, copy_threshold,
        distance_threshold):
    start = timer()
    failed_runs = []
    error_runs = []
    lagrange_runner = lagrange.lagrange(program)

    console = rich.console.Console()
    linreg_xs = []
    linreg_ys = []

    with rich.progress.Progress() as progress:
        jobs = directory.extractTarFileAndMakeDirectories(
            archive, prefix, progress)

        random.shuffle(jobs)

        work_task = progress.add_task("[red]Running...", total=len(jobs))
        for expected, experiment in jobs:
            try:
                experiment.runExperiment(lagrange_runner)
            except directory.ExperimentFilesMissing:
                error_runs.append(experiment)
            progress.update(work_task, advance=1.0)

        check_task = progress.add_task("[red]Checking...", total=len(jobs))

        for expected, experiment in jobs:
            if experiment.failed():
                continue
            parameter_diff = expected.parameterVectorDifference(experiment)
            try:
                dist = expected.metricCompare(experiment)
            except:
                experiment.setFailed()
                continue

            linreg_xs.append(parameter_diff)
            linreg_ys.append(dist)
            if dist > distance_threshold:
                failed_runs.append(
                    directory.ExperimentWithDistance(experiment, dist))
            progress.update(check_task, advance=1.0)

    if len(linreg_xs) > 0:
        linreg_result = LinearRegression().fit(linreg_xs, linreg_ys)
        linreg_rsquared = linreg_result.score(linreg_xs, linreg_ys)
        console.print("Parameter error regression coefficient: {}".format(
            linreg_rsquared))

    with open(os.path.join(prefix, "failed_paths.yaml"), "w") as outfile:
        yaml.add_representer(directory.ExpectedTrialDirectory,
                             directory.DirectoryRepresenter)
        yaml.add_representer(directory.ExperimentTrialDirectory,
                             directory.DirectoryRepresenter)
        yaml.add_representer(directory.ExperimentWithDistance,
                             directory.ExperimentWithDistanceRepresenter)
        outfile.write(
            yaml.dump({
                "failed-runs": failed_runs,
                "error-runs": error_runs
            }))

    if len(failed_runs) != 0 or len(error_runs) != 0:
        if len(failed_runs) != 0:
            console.print(
                "Tests that completed, but gave a wrong result (top 10):",
                sorted(failed_runs, key=lambda a: a._dist)[-10:])
            console.print(
                "Total of {} ({}%) jobs resulted in errors over tolerance".
                format(len(failed_runs),
                       len(failed_runs) / len(jobs) * 100))
        if len(error_runs) != 0:
            console.print("Tests that failed to complete:",
                          sorted(error_runs, key=lambda d: d._path))
            console.print("Total of {} ({}%) jobs failed to run".format(
                len(error_runs),
                len(error_runs) / len(jobs) * 100))
        if not prefix_specified and (
            (len(failed_runs) > copy_threshold and not linreg_rsquared > 0.95)
                or len(error_runs) != 0):
            basename = os.path.split(prefix)[1]
            new_prefix = os.path.abspath(os.path.join(os.getcwd(), basename))
            console.print(
                "Copying the failed directories to {}".format(new_prefix))
            shutil.copytree(prefix, new_prefix)

    else:
        console.print("[bold green]All Clear!")
    end = timer()
    console.print("Testing took {:.3f} seconds".format(end - start))
Exemplo n.º 10
0
    def singularity_download_image(self, container, out_path, cache_path,
                                   progress):
        """Download a singularity image from the web.

        Use native Python to download the file.

        Args:
            container (str): A pipeline's container name. Usually it is of similar format
                to ``https://depot.galaxyproject.org/singularity/name:version``
            out_path (str): The final target output path
            cache_path (str, None): The NXF_SINGULARITY_CACHEDIR path if set, None if not
            progress (Progress): Rich progress bar instance to add tasks to.
        """
        log.debug(f"Downloading Singularity image: '{container}'")

        # Set output path to save file to
        output_path = cache_path or out_path
        output_path_tmp = f"{output_path}.partial"
        log.debug(f"Downloading to: '{output_path_tmp}'")

        # Set up progress bar
        nice_name = container.split("/")[-1][:50]
        task = progress.add_task(nice_name,
                                 start=False,
                                 total=False,
                                 progress_type="download")
        try:
            # Delete temporary file if it already exists
            if os.path.exists(output_path_tmp):
                os.remove(output_path_tmp)

            # Open file handle and download
            with open(output_path_tmp, "wb") as fh:
                # Disable caching as this breaks streamed downloads
                with requests_cache.disabled():
                    r = requests.get(container,
                                     allow_redirects=True,
                                     stream=True,
                                     timeout=60 * 5)
                    filesize = r.headers.get("Content-length")
                    if filesize:
                        progress.update(task, total=int(filesize))
                        progress.start_task(task)

                    # Stream download
                    for data in r.iter_content(chunk_size=4096):
                        # Check that the user didn't hit ctrl-c
                        if self.kill_with_fire:
                            raise KeyboardInterrupt
                        progress.update(task, advance=len(data))
                        fh.write(data)

            # Rename partial filename to final filename
            os.rename(output_path_tmp, output_path)
            output_path_tmp = None

            # Copy cached download if we are using the cache
            if cache_path:
                log.debug("Copying {} from cache: '{}'".format(
                    container, os.path.basename(out_path)))
                progress.update(
                    task, description="Copying from cache to target directory")
                shutil.copyfile(cache_path, out_path)

            progress.remove_task(task)

        except:
            # Kill the progress bars
            for t in progress.task_ids:
                progress.remove_task(t)
            # Try to delete the incomplete download
            log.debug(
                f"Deleting incompleted singularity image download:\n'{output_path_tmp}'"
            )
            if output_path_tmp and os.path.exists(output_path_tmp):
                os.remove(output_path_tmp)
            if output_path and os.path.exists(output_path):
                os.remove(output_path)
            # Re-raise the caught exception
            raise
Exemplo n.º 11
0
    def get_singularity_images(self):
        """Loop through container names and download Singularity images"""

        if len(self.containers) == 0:
            log.info("No container names found in workflow")
        else:
            with DownloadProgress() as progress:
                task = progress.add_task("all_containers",
                                         total=len(self.containers),
                                         progress_type="summary")

                # Organise containers based on what we need to do with them
                containers_exist = []
                containers_cache = []
                containers_download = []
                containers_pull = []
                for container in self.containers:

                    # Fetch the output and cached filenames for this container
                    out_path, cache_path = self.singularity_image_filenames(
                        container)

                    # Check that the directories exist
                    out_path_dir = os.path.dirname(out_path)
                    if not os.path.isdir(out_path_dir):
                        log.debug(
                            f"Output directory not found, creating: {out_path_dir}"
                        )
                        os.makedirs(out_path_dir)
                    if cache_path:
                        cache_path_dir = os.path.dirname(cache_path)
                        if not os.path.isdir(cache_path_dir):
                            log.debug(
                                f"Cache directory not found, creating: {cache_path_dir}"
                            )
                            os.makedirs(cache_path_dir)

                    # We already have the target file in place, return
                    if os.path.exists(out_path):
                        containers_exist.append(container)
                        continue

                    # We have a copy of this in the NXF_SINGULARITY_CACHE dir
                    if cache_path and os.path.exists(cache_path):
                        containers_cache.append(
                            [container, out_path, cache_path])
                        continue

                    # Direct download within Python
                    if container.startswith("http"):
                        containers_download.append(
                            [container, out_path, cache_path])
                        continue

                    # Pull using singularity
                    containers_pull.append([container, out_path, cache_path])

                # Exit if we need to pull images and Singularity is not installed
                if len(containers_pull) > 0 and shutil.which(
                        "singularity") is None:
                    raise OSError(
                        "Singularity is needed to pull images, but it is not installed"
                    )

                # Go through each method of fetching containers in order
                for container in containers_exist:
                    progress.update(task, description="Image file exists")
                    progress.update(task, advance=1)

                for container in containers_cache:
                    progress.update(
                        task,
                        description=f"Copying singularity images from cache")
                    self.singularity_copy_cache_image(*container)
                    progress.update(task, advance=1)

                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=self.parallel_downloads) as pool:
                    progress.update(
                        task, description="Downloading singularity images")

                    # Kick off concurrent downloads
                    future_downloads = [
                        pool.submit(self.singularity_download_image,
                                    *container, progress)
                        for container in containers_download
                    ]

                    # Make ctrl-c work with multi-threading
                    self.kill_with_fire = False

                    try:
                        # Iterate over each threaded download, waiting for them to finish
                        for future in concurrent.futures.as_completed(
                                future_downloads):
                            try:
                                future.result()
                            except Exception:
                                raise
                            else:
                                try:
                                    progress.update(task, advance=1)
                                except Exception as e:
                                    log.error(
                                        f"Error updating progress bar: {e}")

                    except KeyboardInterrupt:
                        # Cancel the future threads that haven't started yet
                        for future in future_downloads:
                            future.cancel()
                        # Set the variable that the threaded function looks for
                        # Will trigger an exception from each thread
                        self.kill_with_fire = True
                        # Re-raise exception on the main thread
                        raise

                for container in containers_pull:
                    progress.update(task,
                                    description="Pulling singularity images")
                    try:
                        self.singularity_pull_image(*container, progress)
                    except RuntimeWarning as r:
                        # Raise exception if this is not possible
                        log.error(
                            "Not able to pull image. Service might be down or internet connection is dead."
                        )
                        raise r
                    progress.update(task, advance=1)
Exemplo n.º 12
0
    def __init__(self,
                 kernel,
                 bin,
                 profile,
                 mutop,
                 timeout=30,
                 fitness='time',
                 popsize=128,
                 llvm_src_filename='cuda-device-only-kernel.ll',
                 use_fitness_map=True,
                 combine_positive_epistasis=False,
                 CXPB=0.8,
                 MUPB=0.1,
                 err_rate='0.01',
                 global_seed=None):
        self.CXPB = CXPB
        self.MUPB = MUPB
        self.err_rate = err_rate
        self.kernels = kernel
        self.appBinary = bin
        self.timeout = timeout
        self.fitness_function = fitness
        self.use_fitness_map = use_fitness_map
        self.combine_positive_epistasis = combine_positive_epistasis
        self.popsize = popsize
        self.mutop = mutop.split(',')
        self.rng = {}
        if global_seed is not None:
            random.seed(global_seed)

        try:
            with open(llvm_src_filename, 'r') as f:
                self.initSrcEnc = f.read().encode()
        except IOError:
            print("File {} does not exist".format(llvm_src_filename))
            exit(1)

        self.verifier = profile['verify']

        # Tools initialization
        # Detect GPU property
        cuda.init()
        # TODO: check if there are multiple GPUs.
        SM_MAJOR, SM_MINOR = cuda.Device(0).compute_capability()
        self.mgpu = 'sm_' + str(SM_MAJOR) + str(SM_MINOR)
        print(f'[Initializing GEVO] GPU compute capability: {self.mgpu}')

        # check Nvidia Profiler exists
        self.nvprof_path = shutil.which('nvprof')
        if self.nvprof_path is None:
            raise Exception('nvprof cannot be found')
        print(f'[Initializing GEVO] nvprof detected: {self.nvprof_path}')

        # Minimize both performance and error
        creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
        creator.create("Individual",
                       irind.llvmIRrep,
                       fitness=creator.FitnessMin)
        self.history = tools.History()
        self.toolbox = base.Toolbox()
        self.toolbox.register('mutate', self.mutLLVM)
        self.toolbox.register('mate', self.cxOnePointLLVM)
        # self.toolbox.register('select', tools.selDoubleTournament, fitness_size=2, parsimony_size=1.4, fitness_first=True)
        self.toolbox.register('select', tools.selNSGA2)
        self.toolbox.register('individual',
                              creator.Individual,
                              srcEnc=self.initSrcEnc,
                              mgpu=self.mgpu)
        self.toolbox.register('population', tools.initRepeat, list,
                              self.toolbox.individual)
        # Decorate the variation operators
        self.toolbox.decorate("mate", self.history.decorator)
        self.toolbox.decorate("mutate", self.history.decorator)

        self.stats = tools.Statistics(lambda ind: ind.fitness.values)
        self.stats.register("min", min)
        self.stats.register("max", max)

        self.logbook = tools.Logbook()
        self.paretof = tools.ParetoFront()
        self.logbook.header = "gen", "evals", "min", "max"

        # Set up testcase
        self.origin = creator.Individual(self.initSrcEnc, self.mgpu)
        self.origin.ptx(self.cudaPTX)
        arg_array = [[]]
        for i, arg in enumerate(profile['args']):
            if arg.get('bond', None) is None:
                arg_array_next = [
                    e[:] for e in arg_array for _ in range(len(arg['value']))
                ]
                arg_array = arg_array_next
                for e1, e2 in zip(arg_array, cycle(arg['value'])):
                    e1.append(e2)
            else:
                for e in arg_array:
                    bonded_arg = arg['bond'][0]
                    bonded_idx = profile['args'][bonded_arg]['value'].index(
                        e[bonded_arg])
                    e.append(arg['value'][bonded_idx])

        arg_array = [[str(e) for e in args] for args in arg_array]

        self.testcase = []
        for i in range(len(arg_array)):
            self.testcase.append(
                self._testcase(self, i, kernel, bin, profile['verify']))
        with Progress(
                "[Initializing GEVO] Evaluate original program with test cases",
                "({task.completed} / {task.total})",
                rich.progress.TimeElapsedColumn()) as progress:
            task = progress.add_task("", total=len(arg_array))
            for tc, arg in zip(self.testcase, arg_array):
                tc.args = arg
                tc.evaluate()
                progress.update(task, advance=1)

        self.ofits = [tc.fitness[0] for tc in self.testcase]
        self.oerrs = [tc.fitness[1] for tc in self.testcase]
        self.origin.fitness.values = (sum(self.ofits) / len(self.ofits),
                                      max(self.oerrs))
        self.editFitMap[tuple()] = self.origin.fitness.values
        print(
            f"Average fitness of the original program: ({self.origin.fitness.values[0]:.2f}, {self.origin.fitness.values[1]:.2f})"
        )
        print("Individual test cases:")
        for fit, err in zip(self.ofits, self.oerrs):
            print(f"\t({fit:.2f}, {err:.2f})")
        self.positive_epistasis = {}
        self.negative_epistasis = {}
        self.need_discussion = {}