Exemplo n.º 1
0
    def __init__(
        self,
        pipeline=None,
        release=None,
        outdir=None,
        compress_type=None,
        force=False,
        container=None,
        singularity_cache_only=False,
        parallel_downloads=4,
    ):
        self.pipeline = pipeline
        self.release = release
        self.outdir = outdir
        self.output_filename = None
        self.compress_type = compress_type
        self.force = force
        self.container = container
        self.singularity_cache_only = singularity_cache_only
        self.parallel_downloads = parallel_downloads

        self.wf_releases = {}
        self.wf_branches = {}
        self.wf_sha = None
        self.wf_download_url = None
        self.nf_config = dict()
        self.containers = list()

        # Fetch remote workflows
        self.wfs = nf_core.list.Workflows()
        self.wfs.get_remote_workflows()
Exemplo n.º 2
0
    def __init__(self, pipeline, release=None, singularity=False, outdir=None):
        self.pipeline = pipeline
        self.release = release
        self.singularity = singularity
        self.outdir = outdir

        self.wf_name = None
        self.wf_sha = None
        self.wf_download_url = None
        self.config = dict()
        self.containers = list()
Exemplo n.º 3
0
    def __init__(self, pipeline, release=None, singularity=False, outdir=None, compress_type='tar.gz'):
        self.pipeline = pipeline
        self.release = release
        self.singularity = singularity
        self.outdir = outdir
        self.output_filename = None
        self.compress_type = compress_type
        if self.compress_type == 'none':
            self.compress_type = None

        self.wf_name = None
        self.wf_sha = None
        self.wf_download_url = None
        self.config = dict()
        self.containers = list()
Exemplo n.º 4
0
    def validate_schema(self, schema=None):
        """
        Check that the Schema is valid

        Returns: Number of parameters found
        """
        if schema is None:
            schema = self.schema
        try:
            jsonschema.Draft7Validator.check_schema(schema)
            log.debug("JSON Schema Draft7 validated")
        except jsonschema.exceptions.SchemaError as e:
            raise AssertionError(
                "Schema does not validate as Draft 7 JSON Schema:\n {}".format(
                    e))

        param_keys = list(schema.get("properties", {}).keys())
        num_params = len(param_keys)
        for d_key, d_schema in schema.get("definitions", {}).items():
            # Check that this definition is mentioned in allOf
            assert "allOf" in schema, "Schema has definitions, but no allOf key"
            in_allOf = False
            for allOf in schema["allOf"]:
                if allOf["$ref"] == "#/definitions/{}".format(d_key):
                    in_allOf = True
            assert in_allOf, "Definition subschema `{}` not included in schema `allOf`".format(
                d_key)

            for d_param_id in d_schema.get("properties", {}):
                # Check that we don't have any duplicate parameter IDs in different definitions
                assert d_param_id not in param_keys, "Duplicate parameter found in schema `definitions`: `{}`".format(
                    d_param_id)
                param_keys.append(d_param_id)
                num_params += 1

        # Check that everything in allOf exists
        for allOf in schema.get("allOf", []):
            assert "definitions" in schema, "Schema has allOf, but no definitions"
            def_key = allOf["$ref"][14:]
            assert def_key in schema[
                "definitions"], "Subschema `{}` found in `allOf` but not `definitions`".format(
                    def_key)

        # Check that the schema describes at least one parameter
        assert num_params > 0, "No parameters found in schema"

        return num_params
Exemplo n.º 5
0
    def find_container_images(self):
        """Find container image names for workflow.

        Starts by using `nextflow config` to pull out any process.container
        declarations. This works for DSL1.

        Second, we look for DSL2 containers. These can't be found with
        `nextflow config` at the time of writing, so we scrape the pipeline files.
        """

        log.debug("Fetching container names for workflow")
        containers_raw = []

        # Use linting code to parse the pipeline nextflow config
        self.nf_config = nf_core.utils.fetch_wf_config(
            os.path.join(self.outdir, "workflow"))

        # Find any config variables that look like a container
        for k, v in self.nf_config.items():
            if k.startswith("process.") and k.endswith(".container"):
                containers_raw.append(v.strip('"').strip("'"))

        # Recursive search through any DSL2 module files for container spec lines.
        for subdir, dirs, files in os.walk(
                os.path.join(self.outdir, "workflow", "modules")):
            for file in files:
                if file.endswith(".nf"):
                    with open(os.path.join(subdir, file), "r") as fh:
                        # Look for any lines with `container = "xxx"`
                        matches = []
                        for line in fh:
                            match = re.match(
                                r"\s*container\s+[\"']([^\"']+)[\"']", line)
                            if match:
                                matches.append(match.group(1))

                        # If we have matches, save the first one that starts with http
                        for m in matches:
                            if m.startswith("http"):
                                containers_raw.append(m.strip('"').strip("'"))
                                break
                        # If we get here then we didn't call break - just save the first match
                        else:
                            if len(matches) > 0:
                                containers_raw.append(
                                    matches[0].strip('"').strip("'"))

        # Remove duplicates and sort
        containers_raw = sorted(list(set(containers_raw)))

        # Strip any container names that have dynamic names - eg. {params.foo}
        self.containers = []
        for container in containers_raw:
            if "{" in container and "}" in container:
                log.error(
                    f"[red]Container name [green]'{container}'[/] has dynamic Nextflow logic in name - skipping![/]"
                )
                log.info(
                    "Please use a 'nextflow run' command to fetch this container. Ask on Slack if you need help."
                )
            else:
                self.containers.append(container)

        log.info("Found {} container{}".format(
            len(self.containers), "s" if len(self.containers) > 1 else ""))
Exemplo n.º 6
0
    def find_container_images(self):
        """Find container image names for workflow.

        Starts by using `nextflow config` to pull out any process.container
        declarations. This works for DSL1. It should return a simple string with resolved logic.

        Second, we look for DSL2 containers. These can't be found with
        `nextflow config` at the time of writing, so we scrape the pipeline files.
        This returns raw source code that will likely need to be cleaned.

        If multiple containers are found, prioritise any prefixed with http for direct download.

        Example syntax:

        Early DSL2:
            if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
                container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0"
            } else {
                container "quay.io/biocontainers/fastqc:0.11.9--0"
            }

        Later DSL2:
            container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
                'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
                'quay.io/biocontainers/fastqc:0.11.9--0' }"

        DSL1 / Special case DSL2:
            container "nfcore/cellranger:6.0.2"
        """

        log.debug("Fetching container names for workflow")
        containers_raw = []

        # Use linting code to parse the pipeline nextflow config
        self.nf_config = nf_core.utils.fetch_wf_config(
            os.path.join(self.outdir, "workflow"))

        # Find any config variables that look like a container
        for k, v in self.nf_config.items():
            if k.startswith("process.") and k.endswith(".container"):
                containers_raw.append(v.strip('"').strip("'"))

        # Recursive search through any DSL2 module files for container spec lines.
        for subdir, dirs, files in os.walk(
                os.path.join(self.outdir, "workflow", "modules")):
            for file in files:
                if file.endswith(".nf"):
                    with open(os.path.join(subdir, file), "r") as fh:
                        # Look for any lines with `container = "xxx"`
                        this_container = None
                        contents = fh.read()
                        matches = re.findall(r"container\s*\"([^\"]*)\"",
                                             contents, re.S)
                        if matches:
                            for match in matches:
                                # Look for a http download URL.
                                # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/3809435/713980
                                url_regex = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
                                url_match = re.search(url_regex, match, re.S)
                                if url_match:
                                    this_container = url_match.group(0)
                                    break  # Prioritise http, exit loop as soon as we find it

                                # No https download, is the entire container string a docker URI?
                                else:
                                    # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980
                                    docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$"
                                    docker_match = re.match(
                                        docker_regex, match.strip(), re.S)
                                    if docker_match:
                                        this_container = docker_match.group(0)

                                    # Don't recognise this, throw a warning
                                    else:
                                        log.error(
                                            f"[red]Cannot parse container string, skipping: [green]{match}"
                                        )

                        if this_container:
                            containers_raw.append(this_container)

        # Remove duplicates and sort
        self.containers = sorted(list(set(containers_raw)))

        log.info("Found {} container{}".format(
            len(self.containers), "s" if len(self.containers) > 1 else ""))