Exemplo n.º 1
0
class TemporaryFileStorage:
    def __init__(self):
        self.temporaryFile = TemporaryFile()
    def __call__(self, data, bytesEncoding='utf8'):
        if type(data) != bytes:
            data = str(data).encode(bytesEncoding)
        # Write string data
        self.temporaryFile.seek(0)
        self.temporaryFile.truncate()
        self.temporaryFile.write(data)
        return self
    def buffer(self):
        self.temporaryFile.seek(0)
        return self.temporaryFile
Exemplo n.º 2
0
class APRFile(object):
    """Wrap a Python file-like object as an APR File"""

    def __init__(self, pyfile):
        self.pyfile = pyfile
        self.pool = Pool()
        self._as_parameter_ = POINTER(apr_file_t)()
        self.tempfile = None
        if hasattr(pyfile, "fileno"):
            # Looks like this is a real file. We can just write
            # directly to said file
            osfile = apr_os_file_t(get_osfhandle(pyfile.fileno()))
        else:
            # Looks like this is a StringIO buffer or a fake file.
            # Write to a temporary file and copy the output to the
            # buffer when we are closed or flushed
            self.tempfile = TemporaryFile()
            osfile = apr_os_file_t(get_osfhandle(self.tempfile.fileno()))
        apr_os_file_put(byref(self._as_parameter_), byref(osfile),
                        APR_CREATE | APR_WRITE | APR_BINARY, self.pool)

    def flush(self):
        """Flush output to the underlying Python object"""
        if self.tempfile:
            self.tempfile.seek(0)
            copyfileobj(self.tempfile, self.pyfile)
            self.tempfile.truncate(0)

    def close(self):
        """Close the APR file wrapper, leaving the underlying Python object
           untouched"""
        self.flush()
        if self.tempfile:
            self.tempfile.close()
            self.tempfile = None
        self.pool.destroy()
        self.pool = None

    def __del__(self):
        if self.pool:
            self.close()
Exemplo n.º 3
0
class CapturedStdout:
    """Capture sys.out output in temp file to allow function result testing
    Thanks to Catherine Devlin (catherinedevlin.blogspot.com) for the idea"""

    def __init__(self):
        """Capture stdout"""
        self.backupStdout=sys.stdout
        self.tmpFile=TemporaryFile()
        sys.stdout=self.tmpFile

    def readlines(self, reset=True):
        """
        @param reset: reset buffer for next usage (default is True)
        @return: array of lines captured and reset buffer"""
        self.tmpFile.seek(0)
        lines=self.tmpFile.readlines()
        if reset:
            self.reset()
        return [line.strip("\n").strip("\x00") for line in lines]

    def reset(self):
        """Reset stdout buffer"""
        self.tmpFile.truncate(0)

    def gotPsyqlException(self, reset=True):
        """Look if captured output has a PysqlException
        @param reset: reset buffer for next usage (default is True)
        @return: True is got exception, else False"""
        lines=self.readlines(reset)
        for line in lines:
            if "Pysql error" in line:
                return True
        return False

    def echoStdout(self):
        """Echo the current buffer on terminal stdout. Usefull for test debuging"""
        self.backupStdout.writelines(["%s\n" % line for line in self.readlines(reset=False)])

    def restoreStdout(self):
        sys.stdout=self.backupStdout
        self.tmpFile.close()
Exemplo n.º 4
0
class APRFile(object):
    """Wrap a Python file-like object as an APR File"""

    def __init__(self, pyfile):
        self.pyfile = pyfile
        self.pool = Pool()
        self._as_parameter_ = POINTER(apr_file_t)()
        self.tempfile = None
        if hasattr(pyfile, "fileno"):
            # Looks like this is a real file. We can just write
            # directly to said file
            osfile = apr_os_file_t(get_osfhandle(pyfile.fileno()))
        else:
            # Looks like this is a StringIO buffer or a fake file.
            # Write to a temporary file and copy the output to the
            # buffer when we are closed or flushed
            self.tempfile = TemporaryFile()
            osfile = apr_os_file_t(get_osfhandle(self.tempfile.fileno()))
        apr_os_file_put(byref(self._as_parameter_), byref(osfile),
                        APR_CREATE | APR_WRITE | APR_BINARY, self.pool)

    def flush(self):
        """Flush output to the underlying Python object"""
        if self.tempfile:
            self.tempfile.seek(0)
            copyfileobj(self.tempfile, self.pyfile)
            self.tempfile.truncate(0)

    def close(self):
        """Close the APR file wrapper, leaving the underlying Python object
           untouched"""
        self.flush()
        if self.tempfile:
            self.tempfile.close()
            self.tempfile = None
        self.pool.destroy()
        self.pool = None

    def __del__(self):
        if self.pool:
            self.close()
Exemplo n.º 5
0
def data(dataset=None, format=None):
    args = request.args
    dataset, format = (dataset, format) if dataset and format else (args["dataset"], args["format"])
    if dataset not in queries["dataset"] or format not in queries["format"]:
        queries["dataset"].append(dataset)
        queries["format"].append(format)
        temp = TemporaryFile('r+b')
        d = experiment_to_dict(dataset+'/'+format)
        temp.write(json.dumps(d))
        temp.truncate()
        if dataset not in cache:
            cache[dataset] = {}
        cache[dataset][format] = temp
    else:
        cache[dataset][format].seek(0)
        d = json.loads(cache[dataset][format].read())

    if dataset and format:
        return jsonify(**d)
    else:
        return jsonify()
     'rpm', '-i', configuration_data['RepositoryURL'] +
     configuration_data['yumRepositoryPath'] + '/jdk-' +
     configuration_data['deploymentDictionary']['javaVersion'] +
     '-fcs.x86_64.rpm', '--oldpackage', '--relocate',
     '/etc/init.d/jexec=/etc/init.d/jexec-' +
     configuration_data['deploymentDictionary']['javaVersion'],
     '--badreloc'
 ],
                             stdout=external_command_output,
                             stderr=external_command_output)
 if exit_code == 0:
     if debugging_enabled is True:
         external_command_output.seek(0)
         print('Yum output as follows:')
         print(external_command_output.read())
         external_command_output.truncate(0)
     print('OK')
 else:
     print('Failed')
     exit_script(1)
 for CACert in ['ExpediaRootCA', 'ExpediaInternal1C']:
     print('Adding ' + CACert + ' certificate to trust store: ', end='')
     certificate_fetch = subprocess.Popen([
         'curl', '-k', '-s', configuration_data['RepositoryURL'] +
         configuration_data['certificatePath'] + '/' + CACert + '.crt'
     ],
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
     certResponseBody = certificate_fetch.communicate()[0]
     if certificate_fetch.returncode != 0:
         print('FAILED. Could not fetch certificate at ' +
Exemplo n.º 7
0
def main():
	# parse options
	option, args = doc_optparse.parse(__doc__)
	
	if len(args) < 2:
		doc_optparse.exit()
	
	gff_files_1 = glob.glob(args[0])
	gff_files_2 = glob.glob(args[1])
	
	# create temporary files to store intersections
	temp_file_1 = TemporaryFile()
	temp_file_2 = TemporaryFile()
	
	if not option.enumerate:
		# use a wider column if we're going to need it
		if option.read_depth:
			col_width = 24
		elif option.verbose:
			col_width = 16
		else:
			col_width = 8
		
		# print column headings
		print " " * 8,
		for i in range(1, len(gff_files_1) + 1):
			print excel_column(i).ljust(col_width),
		print ""
	
	# initialize counter to print row headings
	file_number = 0
	
	# iterate through the second list of files
	for g2_path in gff_files_2:
		
		# print row heading
		if not option.enumerate:
			file_number += 1
			print str(file_number).ljust(8),
		
		# now iterate through the first list, do intersections and compare
		for g1_path in gff_files_1:
			
			# do the intersection one way
			g1 = gff.input(g1_path)
			g2 = gff.input(g2_path)
			for line in g1.intersect(g2):
				print >> temp_file_1, line
			
			# now do the intersection the other way
			g1_reverse = gff.input(g1_path)
			g2_reverse = gff.input(g2_path)
			for line in g2_reverse.intersect(g1_reverse):
				print >> temp_file_2, line
			
			# rewind each temporary file now storing intersection data
			temp_file_1.seek(0)
			temp_file_2.seek(0)
			
			# now go through the temporary files and work out concordancy
			g1_intx = gff.input(temp_file_1)
			g2_intx = gff.input(temp_file_2)
			matching_count = unmatching_count = 0
			# we cannot chain equal signs here, because the two would reference the
			# same list, and that would be bad...
			matching_read_depths, unmatching_read_depths = [], []
			
			for record1 in g1_intx:
				record2 = g2_intx.next()
				
				# these records should match in terms of the interval they represent
				if record2.seqname != record1.seqname or \
				  record2.start != record1.start or \
				  record2.end != record1.end:
				  	raise ValueError("files must be pre-sorted")
				
				# isolate the read depth info if we need to
				if option.read_depth:
					rd = []
					try:
						rd.append(int(record1.attributes["read_depth"].strip("\"")))
					except KeyError:
						pass
					try:
						rd.append(int(record2.attributes["read_depth"].strip("\"")))
					except KeyError:
						pass
				
				# now test if there's concordance
				try:
					if sorted(record2.attributes["alleles"].strip("\"").split("/")) != \
					  sorted(record1.attributes["alleles"].strip("\"").split("/")):
						unmatching_count += 1
						if option.enumerate:
							record1.attributes["concordant"] = "false"
							record2.attributes["concordant"] = "false"
							print record1
							print record2
						if option.read_depth:
							unmatching_read_depths.extend(rd)
					else:
						matching_count += 1
						if option.enumerate:
							record1.attributes["concordant"] = "true"
							record2.attributes["concordant"] = "true"
							print record1
							print record2
						if option.read_depth:
							matching_read_depths.extend(rd)
				# no alleles? not a SNP
				except KeyError:
					continue
			
			# now we print the result, being mindful of possible zero division problems, etc.
			if option.enumerate:
				pass
			elif option.read_depth:
				try:
					a = "%.1f" % mean(matching_read_depths)
					b = "%.1f" % median(matching_read_depths)
				except TypeError:
					a = "--"
					b = "--"
				try:
					c = "%.1f" % mean(unmatching_read_depths)
					d = "%.1f" % median(unmatching_read_depths)
				except TypeError:
					c = "--"
					d = "--"
				print ("%s %s : %s %s" % (a, b, c, d)).ljust(col_width),
			else:
				try:
					p = "%.1f%%" % (float(matching_count) / (matching_count + unmatching_count) * 100)
				except ZeroDivisionError:
					p = "--"
				if option.verbose:
					total_count = unmatching_count + matching_count
					print ("%s %s/%s" % (p, matching_count, total_count)).ljust(col_width),
				else:
					print p.ljust(col_width),
			
			# now we rewind, delete everything, and start again!
			temp_file_1.seek(0)
			temp_file_1.truncate()
			temp_file_2.seek(0)
			temp_file_2.truncate()
		
		# wrap up the line
		print ""
	
	# print the legend describing what the column and row headings mean
	if not option.enumerate:
		print "-" * 8
		file_number = 0
		for i in gff_files_1:
			file_number += 1
			print ("[%s]" % excel_column(file_number)).ljust(8),
			print i
		file_number = 0
		for i in gff_files_2:
			file_number += 1
			print ("[%s]" % file_number).ljust(8),
			print i
Exemplo n.º 8
0
class Composition:
    """A parsed mzcompose.yml with a loaded mzcompose.py file."""

    def __init__(
        self, repo: mzbuild.Repository, name: str, preserve_ports: bool = False
    ):
        self.name = name
        self.repo = repo
        self.images: List[mzbuild.Image] = []
        self.workflows: Dict[str, Callable[..., None]] = {}

        self.default_tag = os.getenv(f"MZBUILD_TAG", None)

        if name in self.repo.compositions:
            self.path = self.repo.compositions[name]
        else:
            raise UnknownCompositionError(name)

        # load the mzcompose.yml file, if one exists
        mzcompose_yml = self.path / "mzcompose.yml"
        if mzcompose_yml.exists():
            with open(mzcompose_yml) as f:
                compose = yaml.safe_load(f) or {}
        else:
            compose = {}

        self.compose = compose

        if "version" not in compose:
            compose["version"] = "3.7"

        if "services" not in compose:
            compose["services"] = {}

        # Load the mzcompose.py file, if one exists
        mzcompose_py = self.path / "mzcompose.py"
        if mzcompose_py.exists():
            spec = importlib.util.spec_from_file_location("mzcompose", mzcompose_py)
            assert spec
            module = importlib.util.module_from_spec(spec)
            assert isinstance(spec.loader, importlib.abc.Loader)
            spec.loader.exec_module(module)
            for name, fn in getmembers(module, isfunction):
                if name.startswith("workflow_"):
                    # The name of the workflow is the name of the function
                    # with the "workflow_" prefix stripped and any underscores
                    # replaced with dashes.
                    name = name[len("workflow_") :].replace("_", "-")
                    self.workflows[name] = fn

            for python_service in getattr(module, "SERVICES", []):
                compose["services"][python_service.name] = python_service.config

        for name, config in compose["services"].items():
            if "propagate_uid_gid" in config:
                if config["propagate_uid_gid"]:
                    config["user"] = f"{os.getuid()}:{os.getgid()}"
                del config["propagate_uid_gid"]

            ports = config.setdefault("ports", [])
            for i, port in enumerate(ports):
                if ":" in str(port):
                    raise UIError(
                        "programming error: disallowed host port in service {name!r}"
                    )
                if preserve_ports:
                    # If preserving ports, bind the container port to the same
                    # host port.
                    ports[i] = f"{port}:{port}"

            if self.repo.rd.coverage:
                # Emit coverage information to a file in a directory that is
                # bind-mounted to the "coverage" directory on the host. We
                # inject the configuration to all services for simplicity, but
                # this only have an effect if the service runs instrumented Rust
                # binaries.
                config.setdefault("environment", []).append(
                    f"LLVM_PROFILE_FILE=/coverage/{name}-%m.profraw"
                )
                config.setdefault("volumes", []).append("./coverage:/coverage")

        # Add default volumes
        compose.setdefault("volumes", {}).update(
            {
                "mzdata": None,
                "tmp": None,
                "secrets": None,
            }
        )

        self._resolve_mzbuild_references()

        # Emit the munged configuration to a temporary file so that we can later
        # pass it to Docker Compose.
        self.file = TemporaryFile()
        os.set_inheritable(self.file.fileno(), True)
        self._write_compose()

    def _resolve_mzbuild_references(self) -> None:
        # Resolve all services that reference an `mzbuild` image to a specific
        # `image` reference.
        for name, config in self.compose["services"].items():
            if "mzbuild" in config:
                image_name = config["mzbuild"]

                if image_name not in self.repo.images:
                    raise UIError(f"mzcompose: unknown image {image_name}")

                image = self.repo.images[image_name]
                override_tag = os.getenv(
                    f"MZBUILD_{image.env_var_name()}_TAG", self.default_tag
                )
                if override_tag is not None:
                    config["image"] = image.docker_name(override_tag)
                    print(
                        f"mzcompose: warning: overriding {image_name} image to tag {override_tag}",
                        file=sys.stderr,
                    )
                    del config["mzbuild"]
                else:
                    self.images.append(image)

        deps = self.repo.resolve_dependencies(self.images)
        for config in self.compose["services"].values():
            if "mzbuild" in config:
                config["image"] = deps[config["mzbuild"]].spec()
                del config["mzbuild"]

    def _write_compose(self) -> None:
        self.file.seek(0)
        self.file.truncate()
        yaml.dump(self.compose, self.file, encoding="utf-8")  # type: ignore
        self.file.flush()

    @classmethod
    def lint(cls, repo: mzbuild.Repository, name: str) -> List[LintError]:
        """Checks a composition for common errors."""
        if not name in repo.compositions:
            raise UnknownCompositionError(name)

        errs: List[LintError] = []

        path = repo.compositions[name] / "mzcompose.yml"

        if path.exists():
            with open(path) as f:
                composition = yaml.safe_load(f) or {}

            _lint_composition(path, composition, errs)
        return errs

    def invoke(self, *args: str, capture: bool = False) -> subprocess.CompletedProcess:
        """Invoke `docker-compose` on the rendered composition.

        Args:
            args: The arguments to pass to `docker-compose`.
            capture: Whether to capture the child's stdout stream.
        """
        print(f"$ docker-compose {' '.join(args)}", file=sys.stderr)

        self.file.seek(0)

        stdout = None
        if capture:
            stdout = subprocess.PIPE

        try:
            return subprocess.run(
                [
                    "docker-compose",
                    f"-f/dev/fd/{self.file.fileno()}",
                    "--project-directory",
                    self.path,
                    *args,
                ],
                close_fds=False,
                check=True,
                stdout=stdout,
                text=True,
            )
        except subprocess.CalledProcessError as e:
            if e.stdout:
                print(e.stdout)
            raise UIError(f"running docker-compose failed (exit status {e.returncode})")

    def port(self, service: str, private_port: Union[int, str]) -> int:
        """Get the public port for a service's private port.

        Delegates to `docker-compose port`. See that command's help for details.

        Args:
            service: The name of a service in the composition.
            private_port: A private port exposed by the service.
        """
        proc = self.invoke("port", service, str(private_port), capture=True)
        if not proc.stdout.strip():
            raise UIError(
                f"service f{service!r} is not exposing port {private_port!r}",
                hint="is the service running?",
            )
        return int(proc.stdout.split(":")[1])

    def default_port(self, service: str) -> int:
        """Get the default public port for a service.

        Args:
            service: The name of a service in the composition.
        """
        ports = self.compose["services"][service]["ports"]
        if not ports:
            raise UIError(f"service f{service!r} does not expose any ports")
        private_port = str(ports[0]).split(":")[0]
        return self.port(service, private_port)

    def workflow(self, name: str, *args: str) -> None:
        """Run a workflow in the composition.

        Raises a `KeyError` if the workflow does not exist.

        Args:
            name: The name of the workflow to run.
            args: The arguments to pass to the workflow function.
        """
        ui.header(f"Running workflow {name}")
        func = self.workflows[name]
        parser = WorkflowArgumentParser(name, inspect.getdoc(func), list(args))
        if len(inspect.signature(func).parameters) > 1:
            func(self, parser)
        else:
            # If the workflow doesn't have an `args` parameter, parse them here
            # with an empty parser to reject bogus arguments and to handle the
            # trivial help message.
            parser.parse_args()
            func(self)

    @contextmanager
    def override(self, *services: "Service") -> Iterator[None]:
        """Temporarily update the composition with the specified services.

        The services must already exist in the composition. They restored to
        their old definitions when the `with` block ends. Note that the service
        definition is written in its entirety; i.e., the configuration is not
        deep merged but replaced wholesale.

        Lest you are tempted to change this function to allow dynamically
        injecting new services: do not do this! These services will not be
        visible to other commands, like `mzcompose run`, `mzcompose logs`, or
        `mzcompose down`, which makes debugging or inspecting the composition
        challenging.
        """
        # Remember the old composition.
        old_compose = copy.deepcopy(self.compose)

        # Update the composition with the new service definitions.
        for service in services:
            if service.name not in self.compose["services"]:
                raise RuntimeError(
                    "programming error in call to Workflow.with_services: "
                    f"{service.name!r} does not exist"
                )
            self.compose["services"][service.name] = service.config
            self._resolve_mzbuild_references()
        self._write_compose()

        try:
            # Run the next composition.
            yield
        finally:
            # Restore the old composition.
            self.compose = old_compose
            self._write_compose()

    def sql(self, sql: str) -> None:
        """Run a batch of SQL statements against the materialized service."""
        port = self.default_port("materialized")
        conn = pg8000.connect(host="localhost", user="******", port=port)
        conn.autocommit = True
        cursor = conn.cursor()
        for statement in sqlparse.split(sql):
            cursor.execute(statement)

    def start_and_wait_for_tcp(self, services: List[str]) -> None:
        """Sequentially start the named services, waiting for eaach to become
        available via TCP before moving on to the next."""
        for service in services:
            self.up(service)
            for port in self.compose["services"][service].get("ports", []):
                self.wait_for_tcp(host=service, port=port)

    def run(
        self,
        service: str,
        *args: str,
        detach: bool = False,
        rm: bool = False,
        env: Dict[str, str] = {},
        capture: bool = False,
    ) -> subprocess.CompletedProcess:
        """Run a one-off command in a service.

        Delegates to `docker-compose run`. See that command's help for details.
        Note that unlike `docker compose run`, any services whose definitions
        have changed are rebuilt (like `docker-compose up` would do) before the
        command is executed.

        Args:
            service: The name of a service in the composition.
            args: Arguments to pass to the service's entrypoint.
            detach: Run the container in the background.
            env: Additional environment variables to set in the container.
            rm: Remove container after run.
            capture: Capture the stdout of the `docker-compose` invocation.
        """
        # Restart any dependencies whose definitions have changed. The trick,
        # taken from Buildkite's Docker Compose plugin, is to run an `up`
        # command that requests zero instances of the requested service.
        self.invoke("up", "--detach", "--scale", f"{service}=0", service)
        return self.invoke(
            "run",
            *(f"-e{k}={v}" for k, v in env.items()),
            *(["--detach"] if detach else []),
            *(["--rm"] if rm else []),
            service,
            *args,
            capture=capture,
        )

    def up(self, *services: str, detach: bool = True) -> None:
        """Build, (re)create, and start the named services.

        Delegates to `docker-compose up`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            detach: Run containers in the background.
        """
        self.invoke("up", *(["--detach"] if detach else []), *services)

    def kill(self, *services: str, signal: str = "SIGKILL") -> None:
        """Force stop service containers.

        Delegates to `docker-compose kill`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            signal: The signal to deliver.
        """
        self.invoke("kill", f"-s{signal}", *services)

    def rm(
        self, *services: str, stop: bool = True, destroy_volumes: bool = True
    ) -> None:
        """Remove stopped service containers.

        Delegates to `docker-compose rm`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            stop: Stop the containers if necessary.
            destroy_volumes: Destroy any anonymous volumes associated with the
                service. Note that this does not destroy any named volumes
                attached to the service.
        """
        self.invoke(
            "rm",
            "--force",
            *(["--stop"] if stop else []),
            *(["-v"] if destroy_volumes else []),
            *services,
        )

    def rm_volumes(self, *volumes: str, force: bool = False) -> None:
        """Remove the named volumes.

        Args:
            volumes: The names of volumes in the composition.
            force: Whether to force the removal (i.e., don't error if the
                volume does not exist).
        """
        volumes = (f"{self.name}_{v}" for v in volumes)
        spawn.runv(
            ["docker", "volume", "rm", *(["--force"] if force else []), *volumes]
        )

    def sleep(self, duration: float) -> None:
        """Sleep for the specified duration in seconds."""
        print(f"Sleeping for {duration} seconds...")
        time.sleep(duration)

    # TODO(benesch): replace with Docker health checks.
    def wait_for_tcp(
        self,
        *,
        host: str = "localhost",
        port: int,
        timeout_secs: int = 240,
    ) -> None:
        ui.progress(f"waiting for {host}:{port}", "C")
        for remaining in ui.timeout_loop(timeout_secs):
            cmd = f"docker run --rm -t --network {self.name}_default ubuntu:focal-20210723".split()

            try:
                _check_tcp(cmd[:], host, port, timeout_secs)
            except subprocess.CalledProcessError:
                ui.progress(" {}".format(int(remaining)))
            else:
                ui.progress(" success!", finish=True)
                return

        ui.progress(" error!", finish=True)
        raise UIError(f"unable to connect to {host}:{port}")

    # TODO(benesch): replace with Docker health checks.
    def wait_for_postgres(
        self,
        *,
        dbname: str = "postgres",
        port: Optional[int] = None,
        host: str = "localhost",
        timeout_secs: int = 120,
        query: str = "SELECT 1",
        user: str = "postgres",
        password: str = "postgres",
        expected: Union[Iterable[Any], Literal["any"]] = [[1]],
        print_result: bool = False,
        service: str = "postgres",
    ) -> None:
        """Wait for a PostgreSQL service to start.

        Args:
            dbname: the name of the database to wait for
            host: the host postgres is listening on
            port: the port postgres is listening on
            timeout_secs: How long to wait for postgres to be up before failing (Default: 30)
            query: The query to execute to ensure that it is running (Default: "Select 1")
            user: The chosen user (this is only relevant for postgres)
            service: The service that postgres is running as (Default: postgres)
        """
        _wait_for_pg(
            dbname=dbname,
            host=host,
            port=port or self.default_port(service),
            timeout_secs=timeout_secs,
            query=query,
            user=user,
            password=password,
            expected=expected,
            print_result=print_result,
        )

    # TODO(benesch): replace with Docker health checks.
    def wait_for_materialized(
        self,
        service: str = "materialized",
        *,
        user: str = "materialize",
        dbname: str = "materialize",
        host: str = "localhost",
        port: Optional[int] = None,
        timeout_secs: int = 60,
        query: str = "SELECT 1",
        expected: Union[Iterable[Any], Literal["any"]] = [[1]],
        print_result: bool = False,
    ) -> None:
        """Like `Workflow.wait_for_postgres`, but with Materialize defaults."""
        self.wait_for_postgres(
            user=user,
            dbname=dbname,
            host=host,
            port=port,
            timeout_secs=timeout_secs,
            query=query,
            expected=expected,
            print_result=print_result,
            service=service,
        )
Exemplo n.º 9
0
class LogParser:
    """
    Class for parsing information about attacks from log files
    """
    def __init__(self,
                 file_log,
                 rules,
                 service_name=None,
                 logger=None):  # type: (str, List[str], str, Logger) -> None
        """
        Initialize the log parser
        :param file_log: path to the file with logs
        :param rules: list of string filters/rules
        :param service_name: optional name of the service. If not specified then found attacks are not assigned to any
        service
        :param logger: optional logger. If specified, LogParsers fires messages into him
        """
        self.file_log = file_log
        self.rules = [
            rule if type(rule) == Rule else Rule(rule, service_name)
            for rule in rules
        ]
        self.logger = logger

        self._last_file_size = 0
        self._last_file_modification_date = None

        self._attack_cache_file = TemporaryFile(
        )  # here will all attacks stay cached
        self._attack_cache_file_lock = Lock()

        # if this last bytes are same then we are sure the file was not modified
        self._last_bytes = {'hash': None, 'len': 0}

        self.force_rescan()

    def parse_attacks(self,
                      max_age=None,
                      skip_scanning=False):  # type: (float, bool) -> dict
        """
        Parses the attacks from log file and returns them
        :param max_age: optional, in seconds. If attack is older as this then it is ignored
        :param skip_scanning: if set to true then the read content is not analyzed for attacks
        :return: dictionary. Key is the IP that attacked and value is list of dictionaries with data about every attack
        """
        if self.logger is not None:
            self.logger.debug('parsing attacks for %s' % self.file_log)

        attacks = {}

        curr_file_size = getsize(self.file_log)
        curr_file_modification_time = getmtime(self.file_log)

        if self._last_file_size == curr_file_size and curr_file_modification_time == self._last_file_modification_date:
            if self.logger is not None:
                self.logger.debug('nothing changed, nothing new to parse')
            # it seems that the files has not changed so skip analyzing it
            return attacks

        continue_in_scanning = True  # when set to True, only new content in file is analyzed

        if self._last_file_size > curr_file_size:
            # when the current file is smaller, something has happened to it. We will rescan it to be sure
            if self.logger is not None:
                self.logger.debug(
                    'file went smaller since las scan, rescan it')
            continue_in_scanning = False
            self.force_rescan()

        if self._last_file_size == curr_file_size and self._last_file_modification_date != curr_file_modification_time:
            # the file has the same size but was still modified, we better rescan it
            if self.logger is not None:
                self.logger.debug(
                    'file is the same size but it was modified,rescan it')
            continue_in_scanning = False
            self.force_rescan()

        with open(self.file_log, 'r') as f:
            if continue_in_scanning and self._last_bytes['hash'] is not None:
                # check last few bytes if they are same
                f.seek(self._last_file_size - self._last_bytes['len'] - 5)
                if md5(f.read(self._last_bytes['len']).encode(
                        'utf8')).hexdigest() != self._last_bytes['hash']:
                    # nope, last few bytes differ, something seems really odd about this file. Better rescan it
                    if self.logger is not None:
                        self.logger.debug(
                            'last few scanned bytes differ, rescan it')
                    self.force_rescan()

            f.seek(self._last_file_size)  # skip all already analyzed content
            new_content = f.read()

        # save last ten bytes so we can know if the file is still the same during new analyze
        content_end = new_content[-256:-5]
        self._last_bytes['hash'] = md5(content_end.encode('utf8')).hexdigest()
        self._last_bytes['len'] = len(content_end)

        log_lines = new_content.splitlines()
        del new_content, content_end

        if not skip_scanning:
            for log_line in log_lines:
                if self.logger is not None:
                    self.logger.debug('log line "%s"' %
                                      log_line.replace('\n', '\\n'))
                for rule in self.rules:
                    variables = rule.get_variables(log_line)
                    if variables is not None:
                        if max_age is not None and time.time(
                        ) - max_age > variables['TIMESTAMP']:
                            break
                        attacker_ip = variables['IP']
                        del variables['IP']
                        item = attacks.get(attacker_ip, [])
                        item.append(variables)
                        attacks[attacker_ip] = item
                        break

        self._last_file_modification_date = curr_file_modification_time
        self._last_file_size = curr_file_size

        self._attack_cache_file_lock.acquire()
        self._attack_cache_file.seek(0)
        attacks.update(
            json.loads(self._attack_cache_file.read().decode('utf8')))
        self._attack_cache_file.seek(0)
        self._attack_cache_file.truncate(0)
        self._attack_cache_file.write(json.dumps(attacks).encode('utf8'))
        self._attack_cache_file.flush()
        self._attack_cache_file_lock.release()

        return attacks

    def get_habitual_offenders(self,
                               min_attack_attempts,
                               attack_attempts_time,
                               max_age=None,
                               attacks=None,
                               first_load=False):
        # type: (int, int, int, dict, bool) -> dict
        """
        Finds IPs that had performed more than allowed number of attacks in specified time range
        :param min_attack_attempts: minimum allowed number of attacks in time range to be included
        :param attack_attempts_time:  the time range in which all of the attacks must have occurred in seconds
        :param max_age: optional, in seconds. If attack is older as this then it is ignored
        :param attacks: optional. If None, then the value of self.parse_attacks(max_age) is used
        :param first_load: If true, then the log file is read only, not scanned for attacks
        :return: dictionary. Key is the IP that attacked more or equal than min_attack_attempts times and
        value is list of dictionaries with data about every attack in specified time range
        """
        attacks = self.parse_attacks(
            max_age, first_load) if attacks is None else attacks
        habitual_offenders = {}

        for ip, attack_list in attacks.items():
            for attack in attack_list:
                attacks_in_time_range = []
                for attack2 in attack_list:
                    attack_time_delta = attack2['TIMESTAMP'] - attack[
                        'TIMESTAMP']
                    if 0 <= attack_time_delta <= attack_attempts_time:
                        attacks_in_time_range.append(attack2)
                        if len(attacks_in_time_range) > min_attack_attempts:
                            break
                if len(attacks_in_time_range) >= min_attack_attempts:
                    habitual_offenders[ip] = attack_list

        return habitual_offenders

    def force_rescan(self):  # type: () -> None
        """
        Resets progress info about log file, forcing program to perform the next scan from the beginning
        :return: None
        """
        self._last_file_size = 0
        self._last_file_modification_date = None
        self._attack_cache_file_lock.acquire()
        self._attack_cache_file.seek(0)
        self._attack_cache_file.truncate(0)
        self._attack_cache_file.write(json.dumps({}).encode('utf8'))
        self._attack_cache_file.flush()
        self._attack_cache_file_lock.release()
Exemplo n.º 10
0
class ShellSubprocess(object):
    """
    Interactive shell running in a persistent process
    """
    def __init__(self):
        self.stdout = TemporaryFile()
        self.stderr = TemporaryFile()
        self.process = subprocess.Popen("/bin/bash",
                                        stdin=subprocess.PIPE,
                                        stdout=self.stdout,
                                        stderr=self.stderr)
        self.stdin = self.process.stdin

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def execute(self, cmd):
        """Execute a command in the shell.

        Args:
            cmd (string, list): command to execute
        """
        if isinstance(cmd, list):
            cmd = subprocess.list2cmdline(cmd)

        self.stdin.write(cmd.encode() + b"\n")
        self.stdin.flush()

    def run(self, cmd):
        """Execute a command in the shell an return the stdout, stderr and exit code.
        NOTE: This method does not write to stdout or stderr buffers.

        Args:
            cmd (string, list): command to execute
        """
        with NamedTemporaryFile() as outfile, NamedTemporaryFile(
        ) as errfile, NamedTemporaryFile() as exitfile:

            self.execute(
                f"({cmd}) 1> {outfile.name} 2> {errfile.name}; echo $? > {exitfile.name}"
            )

            wait_time = 0.0001
            while True:
                exitfile.seek(0)
                exit_code = exitfile.read()
                if exit_code:
                    break
                sleep(wait_time)
                wait_time *= 2

            outfile.seek(0)
            out = outfile.read()

            errfile.seek(0)
            err = errfile.read()

            exit_code = int(exit_code.strip())

        return out, err, exit_code

    def read_stdout(self):
        """Read stdout from the shell

        Returns:
            bytes: stdout
        """
        self.stdout.seek(0)
        return self.stdout.read()

    def read_stderr(self):
        """Read stderr from the shell

        Returns:
            bytes: stderr
        """
        self.stderr.seek(0)
        return self.stderr.read()

    def clear_stdout(self):
        """Clear stdout from the shell"""
        self.stdout.seek(0)
        self.stdout.truncate()

    def clear_stderr(self):
        """Clear stderr from the shell"""
        self.stderr.seek(0)
        self.stderr.truncate()

    def close(self):
        """Close the shell"""
        self.process.terminate()
        self.stdout.close()
        self.stderr.close()
Exemplo n.º 11
0
def parseMultipart(fp, pdict, memfile_max=1024 * 1000, len_max=0):
    """
    Parse multipart content
    """

    # TODO: Do not store whole parts contents in the memoty

    boundary = ''
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not isBoundaryValid(boundary):
        raise ValueError(
            'Invalid boundary in multipart form: {0}'.format(boundary))

    maxlen = 0

    nextpart = b'--' + boundary.encode()
    lastpart = b'--' + boundary.encode() + b'--'
    partdict = {}
    terminator = b''

    while terminator != lastpart:
        nbytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = parse_headers(fp, memfile_max)
            clength = headers.get('content-length')
            if clength is not None:
                try:
                    nbytes = int(clength)
                except ValueError:
                    pass
            if nbytes > 0:
                if maxlen and nbytes > len_max:
                    raise ValueError('Maximum content length exceeded')
                data = fp.read(nbytes)
            else:
                data = b''
        # Read lines until end of part.
        part_fp = TemporaryFile(mode='w+b')
        while 1:
            line = fp.readline(memfile_max)

            if line == b'':
                terminator = lastpart  # End outer loop
                break

            if _is_termline(line, nextpart):
                terminator = nextpart
                break

            if _is_termline(line, lastpart):
                terminator = lastpart
                break

            part_fp.write(line)
            while not line.endswith(b"\n"):
                line = fp.readline(memfile_max)

                if line == b'':
                    break

                part_fp.write(line)

        # Done with part.
        if data is None:
            continue
        if nbytes < 0:
            last = pre_last = None

            # Strip final line terminator
            if part_fp.tell() >= 1:
                part_fp.seek(-1, os.SEEK_END)
                last = part_fp.read(1)

            if part_fp.tell() >= 2:
                part_fp.seek(-2, os.SEEK_END)
                pre_last = part_fp.read(1)

            trunc = 0
            if pre_last == b"\r" and last == b"\n":
                trunc = 2
            elif last == b"\n":
                trunk = 1

            if trunc > 0:
                part_fp.seek(-trunc, os.SEEK_END)
                part_fp.truncate()

        line = headers['content-disposition']
        if not line:
            continue
        key, params = parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue

        part_fp.seek(0, os.SEEK_SET)

        part = {'fp': part_fp}
        if 'filename' in params:
            part['filename'] = params['filename']

        if name in partdict:
            partdict[name].append(part)
        else:
            partdict[name] = [part]

    return partdict
Exemplo n.º 12
0
class S3File(io.IOBase):
    """File like proxy for s3 files, manages upload and download of locally managed temporary file
    """

    def __init__(self, bucket, key, mode='w+b', *args, **kwargs):
        super(S3File, self).__init__(*args, **kwargs)
        self.bucket = bucket
        self.key = key
        self.mode = mode
        self.path = self.bucket + '/' + self.key

        # converts mode to readable/writable to enable the temporary file to have S3 data
        # read or written to it even if the S3File is read/write/append
        # i.e. "r" => "r+", "ab" => "a+b"
        updatable_mode = re.sub(r'^([rwa]+)(b?)$', r'\1+\2', mode)
        self._tempfile = TemporaryFile(updatable_mode)

        try:
            with s3errors(self.path):
                if 'a' in mode:
                    # File is in an appending mode, start with the content in file
                    s3.Object(bucket, key).download_fileobj(self._tempfile)
                    self.seek(0, os.SEEK_END)
                elif 'a' not in mode and 'w' not in mode and 'x' not in mode:
                    # file is not in a create mode, so it is in read mode
                    # start with the content in the file, and seek to the beginning
                    s3.Object(bucket, key).download_fileobj(self._tempfile)
                    self.seek(0, os.SEEK_SET)
        except Exception:
            self.close()
            raise

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        try:
            if self.writable():
                self.seek(0)
                with s3errors(self.path):
                    s3.Object(self.bucket, self.key).upload_fileobj(self._tempfile)
        finally:
            self._tempfile.close()

    @property
    def closed(self):
        return self._tempfile.closed

    def fileno(self):
        return self._tempfile.fileno()

    def flush(self):
        return self._tempfile.flush()

    def isatty(self):
        return self._tempfile.isatty()

    def readable(self):
        return 'r' in self.mode or '+' in self.mode

    def read(self, n=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.read(n)

    def readinto(self, b):
        return self._tempfile.readinto(b)

    def readline(self, limit=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.readline(limit)

    def readlines(self, hint=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.readlines(hint)

    def seek(self, offset, whence=os.SEEK_SET):
        self._tempfile.seek(offset, whence)
        return self.tell()

    def seekable(self):
        return True

    def tell(self):
        return self._tempfile.tell()

    def writable(self):
        return 'w' in self.mode or 'a' in self.mode or '+' in self.mode or 'x' in self.mode

    def write(self, b):
        if not self.writable():
            raise IOError('not open for writing')
        self._tempfile.write(b)
        return len(b)

    def writelines(self, lines):
        if not self.writable():
            raise IOError('not open for writing')
        return self._tempfile.writelines(lines)

    def truncate(self, size=None):
        if not self.writable():
            raise IOError('not open for writing')

        if size is None:
            size = self.tell()

        self._tempfile.truncate(size)
        return size
Exemplo n.º 13
0
def parseMultipart(fp, pdict, memfile_max=1024 * 1000, len_max=0):
    """
    Parse multipart content
    """

    # TODO: Do not store whole parts contents in the memoty

    boundary = ''
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not isBoundaryValid(boundary):
        raise ValueError('Invalid boundary in multipart form: {0}' .
            format(boundary))

    maxlen = 0

    nextpart = b'--' + boundary.encode()
    lastpart = b'--' + boundary.encode() + b'--'
    partdict = {}
    terminator = b''

    while terminator != lastpart:
        nbytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = parse_headers(fp, memfile_max)
            clength = headers.get('content-length')
            if clength is not None:
                try:
                    nbytes = int(clength)
                except ValueError:
                    pass
            if nbytes > 0:
                if maxlen and nbytes > len_max:
                    raise ValueError('Maximum content length exceeded')
                data = fp.read(nbytes)
            else:
                data = b''
        # Read lines until end of part.
        part_fp = TemporaryFile(mode='w+b')
        while 1:
            line = fp.readline(memfile_max)

            if line == b'':
                terminator = lastpart  # End outer loop
                break

            if _is_termline(line, nextpart):
                terminator = nextpart
                break

            if _is_termline(line, lastpart):
                terminator = lastpart
                break

            part_fp.write(line)
            while not line.endswith(b"\n"):
                line = fp.readline(memfile_max)

                if line == b'':
                    break

                part_fp.write(line)

        # Done with part.
        if data is None:
            continue
        if nbytes < 0:
            last = pre_last = None

            # Strip final line terminator
            if part_fp.tell() >= 1:
                part_fp.seek(-1, os.SEEK_END)
                last = part_fp.read(1)

            if part_fp.tell() >= 2:
                part_fp.seek(-2, os.SEEK_END)
                pre_last = part_fp.read(1)

            trunc = 0
            if pre_last == b"\r" and last == b"\n":
                trunc = 2
            elif last == b"\n":
                trunk = 1

            if trunc > 0:
                part_fp.seek(-trunc, os.SEEK_END)
                part_fp.truncate()

        line = headers['content-disposition']
        if not line:
            continue
        key, params = parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue

        part_fp.seek(0, os.SEEK_SET)

        part = {'fp': part_fp}
        if 'filename' in params:
            part['filename'] = params['filename']

        if name in partdict:
            partdict[name].append(part)
        else:
            partdict[name] = [part]

    return partdict
Exemplo n.º 14
0
class Composition:
    """A parsed mzcompose.yml with a loaded mzcompose.py file."""

    @dataclass
    class TestResult:
        duration: float
        error: Optional[str]

    def __init__(
        self,
        repo: mzbuild.Repository,
        name: str,
        preserve_ports: bool = False,
        silent: bool = False,
        munge_services: bool = True,
    ):
        self.name = name
        self.description = None
        self.repo = repo
        self.preserve_ports = preserve_ports
        self.silent = silent
        self.workflows: Dict[str, Callable[..., None]] = {}
        self.test_results: OrderedDict[str, Composition.TestResult] = OrderedDict()

        if name in self.repo.compositions:
            self.path = self.repo.compositions[name]
        else:
            raise UnknownCompositionError(name)

        # load the mzcompose.yml file, if one exists
        mzcompose_yml = self.path / "mzcompose.yml"
        if mzcompose_yml.exists():
            with open(mzcompose_yml) as f:
                compose = yaml.safe_load(f) or {}
        else:
            compose = {}

        self.compose = compose

        if "version" not in compose:
            compose["version"] = "3.7"

        if "services" not in compose:
            compose["services"] = {}

        # Load the mzcompose.py file, if one exists
        mzcompose_py = self.path / "mzcompose.py"
        if mzcompose_py.exists():
            spec = importlib.util.spec_from_file_location("mzcompose", mzcompose_py)
            assert spec
            module = importlib.util.module_from_spec(spec)
            assert isinstance(spec.loader, importlib.abc.Loader)
            spec.loader.exec_module(module)
            self.description = inspect.getdoc(module)
            for name, fn in getmembers(module, isfunction):
                if name.startswith("workflow_"):
                    # The name of the workflow is the name of the function
                    # with the "workflow_" prefix stripped and any underscores
                    # replaced with dashes.
                    name = name[len("workflow_") :].replace("_", "-")
                    self.workflows[name] = fn

            for python_service in getattr(module, "SERVICES", []):
                name = python_service.name
                if name in compose["services"]:
                    raise UIError(f"service {name!r} specified more than once")
                compose["services"][name] = python_service.config

        # Add default volumes
        compose.setdefault("volumes", {}).update(
            {
                "mzdata": None,
                "pgdata": None,
                "mydata": None,
                "tmp": None,
                "secrets": None,
            }
        )

        # The CLI driver will handle acquiring these dependencies.
        if munge_services:
            self.dependencies = self._munge_services(compose["services"].items())

        # Emit the munged configuration to a temporary file so that we can later
        # pass it to Docker Compose.
        self.file = TemporaryFile(mode="w")
        os.set_inheritable(self.file.fileno(), True)
        self._write_compose()

    def _munge_services(
        self, services: List[Tuple[str, dict]]
    ) -> mzbuild.DependencySet:
        images = []

        for name, config in services:
            # Remember any mzbuild references.
            if "mzbuild" in config:
                image_name = config["mzbuild"]
                if image_name not in self.repo.images:
                    raise UIError(f"mzcompose: unknown image {image_name}")
                image = self.repo.images[image_name]
                images.append(image)

            if "propagate_uid_gid" in config:
                if config["propagate_uid_gid"]:
                    config["user"] = f"{os.getuid()}:{os.getgid()}"
                del config["propagate_uid_gid"]

            ports = config.setdefault("ports", [])
            for i, port in enumerate(ports):
                if self.preserve_ports and not ":" in str(port):
                    # If preserving ports, bind the container port to the same
                    # host port, assuming the host port is available.
                    ports[i] = f"{port}:{port}"
                elif ":" in str(port) and not config.get("allow_host_ports", False):
                    # Raise an error for host-bound ports, unless
                    # `allow_host_ports` is `True`
                    raise UIError(
                        "programming error: disallowed host port in service {name!r}",
                        hint=f'Add `"allow_host_ports": True` to the service config to disable this check.',
                    )

            if "allow_host_ports" in config:
                config.pop("allow_host_ports")

            if self.repo.rd.coverage:
                # Emit coverage information to a file in a directory that is
                # bind-mounted to the "coverage" directory on the host. We
                # inject the configuration to all services for simplicity, but
                # this only have an effect if the service runs instrumented Rust
                # binaries.
                config.setdefault("environment", []).append(
                    f"LLVM_PROFILE_FILE=/coverage/{name}-%m.profraw"
                )
                config.setdefault("volumes", []).append("./coverage:/coverage")

        # Determine mzbuild specs and inject them into services accordingly.
        deps = self.repo.resolve_dependencies(images)
        for _name, config in services:
            if "mzbuild" in config:
                config["image"] = deps[config["mzbuild"]].spec()
                del config["mzbuild"]

        return deps

    def _write_compose(self) -> None:
        self.file.seek(0)
        self.file.truncate()
        yaml.dump(self.compose, self.file)
        self.file.flush()

    @classmethod
    def lint(cls, repo: mzbuild.Repository, name: str) -> List[LintError]:
        """Checks a composition for common errors."""
        if not name in repo.compositions:
            raise UnknownCompositionError(name)

        errs: List[LintError] = []

        path = repo.compositions[name] / "mzcompose.yml"

        if path.exists():
            with open(path) as f:
                composition = yaml.safe_load(f) or {}

            _lint_composition(path, composition, errs)
        return errs

    def invoke(
        self, *args: str, capture: bool = False, stdin: Optional[str] = None
    ) -> subprocess.CompletedProcess:
        """Invoke `docker-compose` on the rendered composition.

        Args:
            args: The arguments to pass to `docker-compose`.
            capture: Whether to capture the child's stdout stream.
            input: A string to provide as stdin for the command.
        """

        if not self.silent:
            print(f"$ docker-compose {' '.join(args)}", file=sys.stderr)

        self.file.seek(0)

        stdout = None
        if capture:
            stdout = subprocess.PIPE

        try:
            return subprocess.run(
                [
                    "docker-compose",
                    *(["--log-level=ERROR"] if self.silent else []),
                    f"-f/dev/fd/{self.file.fileno()}",
                    "--project-directory",
                    self.path,
                    *args,
                ],
                close_fds=False,
                check=True,
                stdout=stdout,
                input=stdin,
                text=True,
            )
        except subprocess.CalledProcessError as e:
            if e.stdout:
                print(e.stdout)
            raise UIError(f"running docker-compose failed (exit status {e.returncode})")

    def port(self, service: str, private_port: Union[int, str]) -> int:
        """Get the public port for a service's private port.

        Delegates to `docker-compose port`. See that command's help for details.

        Args:
            service: The name of a service in the composition.
            private_port: A private port exposed by the service.
        """
        proc = self.invoke("port", service, str(private_port), capture=True)
        if not proc.stdout.strip():
            raise UIError(
                f"service f{service!r} is not exposing port {private_port!r}",
                hint="is the service running?",
            )
        return int(proc.stdout.split(":")[1])

    def default_port(self, service: str) -> int:
        """Get the default public port for a service.

        Args:
            service: The name of a service in the composition.
        """
        ports = self.compose["services"][service]["ports"]
        if not ports:
            raise UIError(f"service f{service!r} does not expose any ports")
        private_port = str(ports[0]).split(":")[0]
        return self.port(service, private_port)

    def workflow(self, name: str, *args: str) -> None:
        """Run a workflow in the composition.

        Raises a `KeyError` if the workflow does not exist.

        Args:
            name: The name of the workflow to run.
            args: The arguments to pass to the workflow function.
        """
        ui.header(f"Running workflow {name}")
        func = self.workflows[name]
        parser = WorkflowArgumentParser(name, inspect.getdoc(func), list(args))
        if len(inspect.signature(func).parameters) > 1:
            func(self, parser)
        else:
            # If the workflow doesn't have an `args` parameter, parse them here
            # with an empty parser to reject bogus arguments and to handle the
            # trivial help message.
            parser.parse_args()
            func(self)

    @contextmanager
    def override(self, *services: "Service") -> Iterator[None]:
        """Temporarily update the composition with the specified services.

        The services must already exist in the composition. They restored to
        their old definitions when the `with` block ends. Note that the service
        definition is written in its entirety; i.e., the configuration is not
        deep merged but replaced wholesale.

        Lest you are tempted to change this function to allow dynamically
        injecting new services: do not do this! These services will not be
        visible to other commands, like `mzcompose run`, `mzcompose logs`, or
        `mzcompose down`, which makes debugging or inspecting the composition
        challenging.
        """
        # Remember the old composition.
        old_compose = copy.deepcopy(self.compose)

        # Update the composition with the new service definitions.
        deps = self._munge_services([(s.name, cast(dict, s.config)) for s in services])
        for service in services:
            self.compose["services"][service.name] = service.config

        # Re-acquire dependencies, as the override may have swapped an `image`
        # config for an `mzbuild` config.
        deps.acquire()

        self._write_compose()

        # Ensure image freshness
        self.pull_if_variable([service.name for service in services])

        try:
            # Run the next composition.
            yield
        finally:
            # Restore the old composition.
            self.compose = old_compose
            self._write_compose()

    @contextmanager
    def test_case(self, name: str) -> Iterator[None]:
        """Execute a test case.

        This context manager provides a very lightweight testing framework. If
        the body of the context manager raises an exception, the test case is
        considered to have failed; otherwise it is considered to have succeeded.
        In either case the execution time and status of the test are recorded in
        `test_results`.

        Example:
            A simple workflow that executes a table-driven test:

            ```
            @dataclass
            class TestCase:
                name: str
                files: list[str]

            test_cases = [
                TestCase(name="short", files=["quicktests.td"]),
                TestCase(name="long", files=["longtest1.td", "longtest2.td"]),
            ]

            def workflow_default(c: Composition):
                for tc in test_cases:
                    with c.test_case(tc.name):
                        c.run("testdrive", *tc.files)
            ```

        Args:
            name: The name of the test case. Must be unique across the lifetime
                of a composition.
        """
        if name in self.test_results:
            raise UIError(f"test case {name} executed twice")
        ui.header(f"Running test case {name}")
        error = None
        start_time = time.time()
        try:
            yield
            ui.header(f"mzcompose: test case {name} succeeded")
        except Exception as e:
            error = str(e)
            if isinstance(e, UIError):
                print(f"mzcompose: test case {name} failed: {e}", file=sys.stderr)
            else:
                print(f"mzcompose: test case {name} failed:", file=sys.stderr)
                traceback.print_exc()
        elapsed = time.time() - start_time
        self.test_results[name] = Composition.TestResult(elapsed, error)

    def sql_cursor(self) -> Cursor:
        """Get a cursor to run SQL queries against the materialized service."""
        port = self.default_port("materialized")
        conn = pg8000.connect(host="localhost", user="******", port=port)
        conn.autocommit = True
        return conn.cursor()

    def sql(self, sql: str) -> None:
        """Run a batch of SQL statements against the materialized service."""
        with self.sql_cursor() as cursor:
            for statement in sqlparse.split(sql):
                print(f"> {statement}")
                cursor.execute(statement)

    def sql_query(self, sql: str) -> Any:
        """Execute and return results of a SQL query."""
        with self.sql_cursor() as cursor:
            cursor.execute(sql)
            return cursor.fetchall()

    def create_cluster(
        self,
        cluster: List,
        cluster_name: str = "cluster1",
        replica_name: str = "replica1",
    ) -> None:
        """Construct and run a CREATE CLUSTER statement based a list of Computed instances

        Args:
            cluster: a List of Computed instances that will form the cluster
            cluster_name: The cluster name to use
            replica_name: The replica name to use
        """
        self.sql(
            f"CREATE CLUSTER {cluster_name} REPLICAS ( {replica_name} ( REMOTE ["
            + ", ".join(f'"{p.name}:2100"' for p in cluster)
            + "]))"
        )

    def start_and_wait_for_tcp(self, services: List[str]) -> None:
        """Sequentially start the named services, waiting for eaach to become
        available via TCP before moving on to the next."""
        for service in services:
            self.up(service)
            for port in self.compose["services"][service].get("ports", []):
                self.wait_for_tcp(host=service, port=port)

    def run(
        self,
        service: str,
        *args: str,
        detach: bool = False,
        rm: bool = False,
        env_extra: Dict[str, str] = {},
        capture: bool = False,
        stdin: Optional[str] = None,
    ) -> subprocess.CompletedProcess:
        """Run a one-off command in a service.

        Delegates to `docker-compose run`. See that command's help for details.
        Note that unlike `docker compose run`, any services whose definitions
        have changed are rebuilt (like `docker-compose up` would do) before the
        command is executed.

        Args:
            service: The name of a service in the composition.
            args: Arguments to pass to the service's entrypoint.
            detach: Run the container in the background.
            stdin: read STDIN from a string.
            env_extra: Additional environment variables to set in the container.
            rm: Remove container after run.
            capture: Capture the stdout of the `docker-compose` invocation.
        """
        # Restart any dependencies whose definitions have changed. The trick,
        # taken from Buildkite's Docker Compose plugin, is to run an `up`
        # command that requests zero instances of the requested service.
        self.invoke("up", "--detach", "--scale", f"{service}=0", service)
        return self.invoke(
            "run",
            *(f"-e{k}={v}" for k, v in env_extra.items()),
            *(["--detach"] if detach else []),
            *(["--rm"] if rm else []),
            service,
            *args,
            capture=capture,
            stdin=stdin,
        )

    def exec(
        self,
        service: str,
        *args: str,
        detach: bool = False,
        capture: bool = False,
        stdin: Optional[str] = None,
    ) -> subprocess.CompletedProcess:
        """Execute a one-off command in a service's running container

        Delegates to `docker-compose exec`.

        Args:
            service: The service whose container will be used.
            command: The command to run.
            args: Arguments to pass to the command.
            detach: Run the container in the background.
            stdin: read STDIN from a string.
        """

        return self.invoke(
            "exec",
            *(["--detach"] if detach else []),
            "-T",
            service,
            *(
                self.compose["services"][service]["entrypoint"]
                if "entrypoint" in self.compose["services"][service]
                else []
            ),
            *args,
            capture=capture,
            stdin=stdin,
        )

    def pull_if_variable(self, services: List[str]) -> None:
        """Pull fresh service images in case the tag indicates thee underlying image may change over time.

        Args:
            services: List of service names
        """

        for service in services:
            if "image" in self.compose["services"][service] and any(
                self.compose["services"][service]["image"].endswith(tag)
                for tag in [":latest", ":unstable", ":rolling"]
            ):
                self.invoke("pull", service)

    def up(self, *services: str, detach: bool = True, persistent: bool = False) -> None:
        """Build, (re)create, and start the named services.

        Delegates to `docker-compose up`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            detach: Run containers in the background.
            persistent: Replace the container's entrypoint and command with
                `sleep infinity` so that additional commands can be scheduled
                on the container with `Composition.exec`.
        """
        if persistent:
            old_compose = copy.deepcopy(self.compose)
            for service in self.compose["services"].values():
                service["entrypoint"] = ["sleep", "infinity"]
                service["command"] = []
            self._write_compose()

        self.invoke("up", *(["--detach"] if detach else []), *services)

        if persistent:
            self.compose = old_compose
            self._write_compose()

    def down(self, destroy_volumes: bool = True, remove_orphans: bool = True) -> None:
        """Stop and remove resources.

        Delegates to `docker-compose down`. See that command's help for details.

        Args:
            destroy_volumes: Remove named volumes and anonymous volumes attached
                to containers.
        """
        self.invoke(
            "down",
            *(["--volumes"] if destroy_volumes else []),
            *(["--remove-orphans"] if remove_orphans else []),
        )

    def stop(self, *services: str) -> None:
        """Stop the docker containers for the named services.

        Delegates to `docker-compose stop`. See that command's help for details.

        Args:
            services: The names of services in the composition.
        """
        self.invoke("stop", *services)

    def kill(self, *services: str, signal: str = "SIGKILL") -> None:
        """Force stop service containers.

        Delegates to `docker-compose kill`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            signal: The signal to deliver.
        """
        self.invoke("kill", f"-s{signal}", *services)

    def pause(self, *services: str) -> None:
        """Pause service containers.

        Delegates to `docker-compose pause`. See that command's help for details.

        Args:
            services: The names of services in the composition.
        """
        self.invoke("pause", *services)

    def unpause(self, *services: str) -> None:
        """Unpause service containers

        Delegates to `docker-compose unpause`. See that command's help for details.

        Args:
            services: The names of services in the composition.
        """
        self.invoke("unpause", *services)

    def rm(
        self, *services: str, stop: bool = True, destroy_volumes: bool = True
    ) -> None:
        """Remove stopped service containers.

        Delegates to `docker-compose rm`. See that command's help for details.

        Args:
            services: The names of services in the composition.
            stop: Stop the containers if necessary.
            destroy_volumes: Destroy any anonymous volumes associated with the
                service. Note that this does not destroy any named volumes
                attached to the service.
        """
        self.invoke(
            "rm",
            "--force",
            *(["--stop"] if stop else []),
            *(["-v"] if destroy_volumes else []),
            *services,
        )

    def rm_volumes(self, *volumes: str, force: bool = False) -> None:
        """Remove the named volumes.

        Args:
            volumes: The names of volumes in the composition.
            force: Whether to force the removal (i.e., don't error if the
                volume does not exist).
        """
        volumes = (f"{self.name}_{v}" for v in volumes)
        spawn.runv(
            ["docker", "volume", "rm", *(["--force"] if force else []), *volumes]
        )

    def sleep(self, duration: float) -> None:
        """Sleep for the specified duration in seconds."""
        print(f"Sleeping for {duration} seconds...")
        time.sleep(duration)

    # TODO(benesch): replace with Docker health checks.
    def wait_for_tcp(
        self,
        *,
        host: str = "localhost",
        port: Union[int, str],
        timeout_secs: int = 240,
    ) -> None:
        if isinstance(port, str):
            port = int(port.split(":")[0])
        ui.progress(f"waiting for {host}:{port}", "C")
        cmd = f"docker run --rm -t --network {self.name}_default ubuntu:focal-20210723".split()
        try:
            _check_tcp(cmd[:], host, port, timeout_secs)
        except subprocess.CalledProcessError:
            ui.progress(" error!", finish=True)
            raise UIError(f"unable to connect to {host}:{port}")
        else:
            ui.progress(" success!", finish=True)

    # TODO(benesch): replace with Docker health checks.
    def wait_for_postgres(
        self,
        *,
        dbname: str = "postgres",
        port: Optional[int] = None,
        host: str = "localhost",
        timeout_secs: int = 120,
        query: str = "SELECT 1",
        user: str = "postgres",
        password: str = "postgres",
        expected: Union[Iterable[Any], Literal["any"]] = [[1]],
        print_result: bool = False,
        service: str = "postgres",
    ) -> None:
        """Wait for a PostgreSQL service to start.

        Args:
            dbname: the name of the database to wait for
            host: the host postgres is listening on
            port: the port postgres is listening on
            timeout_secs: How long to wait for postgres to be up before failing (Default: 30)
            query: The query to execute to ensure that it is running (Default: "Select 1")
            user: The chosen user (this is only relevant for postgres)
            service: The service that postgres is running as (Default: postgres)
        """
        _wait_for_pg(
            dbname=dbname,
            host=host,
            port=self.port(service, port) if port else self.default_port(service),
            timeout_secs=timeout_secs,
            query=query,
            user=user,
            password=password,
            expected=expected,
            print_result=print_result,
        )

    # TODO(benesch): replace with Docker health checks.
    def wait_for_materialized(
        self,
        service: str = "materialized",
        *,
        user: str = "materialize",
        dbname: str = "materialize",
        host: str = "localhost",
        port: Optional[int] = None,
        timeout_secs: int = 60,
        query: str = "SELECT 1",
        expected: Union[Iterable[Any], Literal["any"]] = [[1]],
        print_result: bool = False,
    ) -> None:
        """Like `Workflow.wait_for_postgres`, but with Materialize defaults."""
        self.wait_for_postgres(
            user=user,
            dbname=dbname,
            host=host,
            port=port,
            timeout_secs=timeout_secs,
            query=query,
            expected=expected,
            print_result=print_result,
            service=service,
        )

    def testdrive(
        self,
        input: str,
        service: str = "testdrive",
        persistent: bool = True,
        args: List[str] = [],
    ) -> None:
        """Run a string as a testdrive script.

        Args:
            args: Additional arguments to pass to testdrive
            service: Optional name of the testdrive service to use.
            input: The string to execute.
            persistent: Whether a persistent testdrive container will be used.
        """

        if persistent:
            self.exec(service, *args, stdin=input)
        else:
            self.run(service, *args, stdin=input)