def print_docker_logs(pattern: str, tail: int = 0) -> None: out = spawn.capture( ["docker", "ps", "-a", "--format={{.Names}}"], unicode=True ).splitlines() for line in out: if line.startswith(pattern): spawn.runv(["docker", "logs", "--tail", str(tail), line])
def get_container_id(self, service: str, running: bool = False) -> str: """Given a service name, tries to find a unique matching container id If running is True, only return running containers. """ try: if running: cmd = f"docker ps".split() else: cmd = f"docker ps -a".split() list_containers = spawn.capture(cmd, unicode=True) pattern = re.compile(f"^(?P<c_id>[^ ]+).*{service}") matches = [] for line in list_containers.splitlines(): m = pattern.search(line) if m: matches.append(m.group("c_id")) if len(matches) != 1: raise Failed( f"failed to get a unique container id for service {service}, found: {matches}" ) return matches[0] except subprocess.CalledProcessError as e: raise Failed(f"failed to get container id for {service}: {e}")
def run(self) -> None: super().run() CargoBuild(self.rd, self.path, { "bin": "testdrive", "strip": True }).build() CargoBuild(self.rd, self.path, { "bin": "materialized", "strip": True }).build() # NOTE(benesch): The two invocations of `cargo test --no-run` here # deserve some explanation. The first invocation prints error messages # to stdout in a human readable form. If that succeeds, the second # invocation instructs Cargo to dump the locations of the test binaries # it built in a machine readable form. Without the first invocation, the # error messages would also be sent to the output file in JSON, and the # user would only see a vague "could not compile <package>" error. args = [ self.rd.xcargo(), "test", "--locked", "--no-run", ] spawn.runv(args) output = spawn.capture(args + ["--message-format=json"], unicode=True) tests = [] for line in output.split("\n"): if line.strip() == "": continue message = json.loads(line) if message.get("profile", {}).get("test", False): crate_name = message["package_id"].split()[0] target_kind = "".join(message["target"]["kind"]) slug = crate_name + "." + target_kind if target_kind != "lib": slug += "." + message["target"]["name"] crate_path_match = re.search(r"\(path\+file://(.*)\)", message["package_id"]) if not crate_path_match: raise ValueError( f'invalid package_id: {message["package_id"]}') crate_path = Path(crate_path_match.group(1)).relative_to( self.rd.root.resolve()) tests.append((message["executable"], slug, crate_path)) os.makedirs(self.path / "tests" / "examples") with open(self.path / "tests" / "manifest", "w") as manifest: for (executable, slug, crate_path) in tests: shutil.copy(executable, self.path / "tests" / slug) spawn.runv([xstrip, self.path / "tests" / slug]) manifest.write(f"{slug} {crate_path}\n") shutil.move(str(self.path / "materialized"), self.path / "tests") shutil.move(str(self.path / "testdrive"), self.path / "tests") shutil.copy( self.rd.xcargo_target_dir() / "debug" / "examples" / "pingpong", self.path / "tests" / "examples", ) shutil.copytree(self.rd.root / "misc" / "shlib", self.path / "shlib")
def docker_images() -> Set[str]: """List the Docker images available on the local machine.""" return set( spawn.capture( ["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"], unicode=True, ).strip().split("\n"))
def first_remote_matching(pattern: str) -> Optional[str]: """Get the name of the remote that matches the pattern""" remotes = spawn.capture(["git", "remote", "-v"], unicode=True) for remote in remotes.splitlines(): if pattern in remote: return remote.split()[0] return None
def build(self) -> None: cargo_build = [ *self.rd.cargo("build", self.rustflags), "--bin", self.bin ] if self.rd.release_mode: cargo_build.append("--release") spawn.runv(cargo_build, cwd=self.rd.root) cargo_profile = "release" if self.rd.release_mode else "debug" shutil.copy(self.rd.cargo_target_dir() / cargo_profile / self.bin, self.path) if self.strip: # NOTE(benesch): the debug information is large enough that it slows # down CI, since we're packaging these binaries up into Docker # images and shipping them around. A bit unfortunate, since it'd be # nice to have useful backtraces if the binary crashes. spawn.runv( [ *self.rd.tool("strip"), "--strip-debug", self.path / self.bin ], cwd=self.rd.root, ) else: # Even if we've been asked not to strip the binary, remove the # `.debug_pubnames` and `.debug_pubtypes` sections. These are just # indexes that speed up launching a debugger against the binary, # and we're happy to have slower debugger start up in exchange for # smaller binaries. Plus the sections have been obsoleted by a # `.debug_names` section in DWARF 5, and so debugger support for # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. # See: https://github.com/rust-lang/rust/issues/46034 spawn.runv( [ *self.rd.tool("objcopy"), "-R", ".debug_pubnames", "-R", ".debug_pubtypes", self.path / self.bin, ], cwd=self.rd.root, ) if self.extract: output = spawn.capture( cargo_build + ["--message-format=json"], unicode=True, cwd=self.rd.root, ) for line in output.split("\n"): if line.strip() == "" or not line.startswith("{"): continue message = json.loads(line) if message["reason"] != "build-script-executed": continue package = message["package_id"].split()[0] for d in self.extract.get(package, []): shutil.copy( Path(message["out_dir"]) / d, self.path / Path(d).name)
def mz_proc(cid: str) -> psutil.Process: docker_info = spawn.capture(["docker", "inspect", cid]) docker_info = json.loads(docker_info) docker_init_pid = int(docker_info[0]["State"]["Pid"]) docker_init = psutil.Process(docker_init_pid) for child in docker_init.children(recursive=True): if child.name() == "materialized": assert isinstance(child, psutil.Process) return child raise RuntimeError("Couldn't find materialized pid")
def capture(self, args: List[str], stderr_too: bool = False) -> str: try: return spawn.capture(args, stderr_too=stderr_too, unicode=True) except subprocess.CalledProcessError as e: # Print any captured output, since it probably hints at the problem. print(e.output, file=sys.stderr, end="") raise UIError( f"running `{args[0]}` failed (exit status {e.returncode})") except FileNotFoundError: raise UIError(f"unable to launch `{args[0]}`", hint=f"is {args[0]} installed?")
def assert_docker_compose_version() -> None: """Check the version of docker-compose installed. Raises `MzRuntimeError` if the version is not recent enough. """ cmd = ["docker-compose", "version", "--short"] output = spawn.capture(cmd, unicode=True).strip() version = tuple(int(i) for i in output.split(".")) if version < MIN_COMPOSE_VERSION: msg = f"Unsupported docker-compose version: {version}, min required: {MIN_COMPOSE_VERSION}" raise errors.MzConfigurationError(msg)
def rev_parse(rev: str) -> str: """Compute the hash for a revision. Args: rev: A Git revision in any format known to the Git CLI. Returns: sha: A 40 character hex-encoded SHA-1 hash representing the ID of the named revision in Git's object database. """ return spawn.capture(["git", "rev-parse", "--verify", rev], unicode=True).strip()
def find_host_ports(self, service: str) -> List[str]: """Find all ports open on the host for a given service""" # Parsing the output of `docker-compose ps` directly is fraught, as the # output depends on terminal width (!). Using the `-q` flag is safe, # however, and we can pipe the container IDs into `docker inspect`, # which supports machine-readable output. containers = spawn.capture( ["bin/mzcompose", "--mz-quiet", *self._compose_args(), "ps", "-q"], unicode=True, ).splitlines() metadata = spawn.capture( ["docker", "inspect", "-f", "{{json .}}", *containers,] ) metadata = [json.loads(line) for line in metadata.splitlines()] ports = [] for md in metadata: if md["Config"]["Labels"]["com.docker.compose.service"] == service: for (name, port_entry) in md["NetworkSettings"]["Ports"].items(): for p in port_entry or []: ports.append(p["HostPort"]) return ports
def rev_count(rev: str) -> int: """Count the commits up to a revision. Args: rev: A Git revision in any format know to the Git CLI. Returns: count: The number of commits in the Git repository starting from the initial commit and ending with the specified commit, inclusive. """ return int( spawn.capture(["git", "rev-list", "--count", rev, "--"]).strip())
def _check_tcp(cmd: List[str], host: str, port: int, timeout_secs: int, kind: str = "") -> List[str]: cmd.extend([ "timeout", str(timeout_secs), "bash", "-c", f"until [ cat < /dev/null > /dev/tcp/{host}/{port} ] ; do sleep 0.1 ; done", ]) try: spawn.capture(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: ui.log_in_automation( "wait-for-tcp ({}{}:{}): error running {}: {}, stdout:\n{}\nstderr:\n{}" .format(kind, host, port, ui.shell_quote(cmd), e, e.stdout, e.stderr)) raise return cmd
def workflow_testdrive_redpanda_ci(c: Composition) -> None: """Run testdrive against files known to be supported by Redpanda.""" # https://github.com/vectorizedio/redpanda/issues/2397 KNOWN_FAILURES = {"kafka-time-offset.td"} files = set( # NOTE(benesch): invoking the shell like this to filter testdrive files is # pretty gross. Let's not get into the habit of using this construction. spawn.capture(["sh", "-c", "grep -lr '\$.*kafka-ingest' *.td"], cwd=Path(__file__).parent).split()) files -= KNOWN_FAILURES c.workflow("default", "--redpanda", *files)
def workflow_nightly(c: Composition) -> None: """Run cluster testdrive""" c.start_and_wait_for_tcp(services=["zookeeper", "kafka", "schema-registry"]) # Skip tests that use features that are not supported yet. files = spawn.capture( [ "sh", "-c", "grep -rLE 'mz_catalog|mz_kafka_|mz_records_|mz_metrics' testdrive/*.td", ], cwd=Path(__file__).parent.parent, ).split() test_cluster(c, *files)
def docker_inspect(self, format: str, container_id: str) -> str: try: cmd = f"docker inspect -f '{format}' {container_id}".split() output = spawn.capture(cmd, unicode=True, stderr_too=True).splitlines()[0] except subprocess.CalledProcessError as e: ui.log_in_automation( "docker inspect ({}): error running {}: {}, stdout:\n{}\nstderr:\n{}" .format(container_id, ui.shell_quote(cmd), e, e.stdout, e.stderr)) raise errors.Failed(f"failed to inspect Docker container: {e}") else: return output
def run(self, comp: Composition, workflow: Workflow) -> None: ui.progress( f"waiting for {self._host}:{self._port}", "C", ) for remaining in ui.timeout_loop(self._timeout_secs): cmd = f"docker run --rm -it --network {comp.name}_default ubuntu:bionic-20200403".split( ) cmd.extend([ "timeout", str(self._timeout_secs), "bash", "-c", f"cat < /dev/null > /dev/tcp/{self._host}/{self._port}", ]) try: spawn.capture(cmd, unicode=True, stderr_too=True) except subprocess.CalledProcessError: ui.progress(" {}".format(int(remaining))) else: ui.progress(" success!", finish=True) return raise Failed(f"Unable to connect to {self._host}:{self._port}")
def run(self) -> None: super().run() # NOTE(benesch): The two invocations of `cargo test --no-run` here # deserve some explanation. The first invocation prints error messages # to stdout in a human readable form. If that succeeds, the second # invocation instructs Cargo to dump the locations of the test binaries # it built in a machine readable form. Without the first invocation, the # error messages would also be sent to the output file in JSON, and the # user would only see a vague "could not compile <package>" error. args = [*self.rd.cargo("test", rustflags=[]), "--locked", "--no-run"] spawn.runv(args, cwd=self.rd.root) output = spawn.capture(args + ["--message-format=json"], cwd=self.rd.root) tests = [] for line in output.split("\n"): if line.strip() == "": continue message = json.loads(line) if message.get("profile", {}).get("test", False): crate_name = message["package_id"].split()[0] target_kind = "".join(message["target"]["kind"]) if target_kind == "proc-macro": continue slug = crate_name + "." + target_kind if target_kind != "lib": slug += "." + message["target"]["name"] crate_path_match = re.search(r"\(path\+file://(.*)\)", message["package_id"]) if not crate_path_match: raise ValueError( f'invalid package_id: {message["package_id"]}') crate_path = Path(crate_path_match.group(1)).relative_to( self.rd.root.resolve()) executable = self.rd.rewrite_builder_path_for_host( Path(message["executable"])) tests.append((executable, slug, crate_path)) os.makedirs(self.path / "tests" / "examples") with open(self.path / "tests" / "manifest", "w") as manifest: for (executable, slug, crate_path) in tests: shutil.copy(executable, self.path / "tests" / slug) spawn.runv( [*self.rd.tool("strip"), self.path / "tests" / slug], cwd=self.rd.root, ) package = slug.replace(".", "::") manifest.write(f"{slug} {package} {crate_path}\n") shutil.copytree(self.rd.root / "misc" / "shlib", self.path / "shlib")
def get_docker_processes(running: bool = False) -> str: """ Use 'docker ps' to return all Docker process information. :param running: If True, only return running processes. :return: str of processes """ try: if running: cmd = f"docker ps".split() else: cmd = f"docker ps -a".split() return spawn.capture(cmd, unicode=True) except subprocess.CalledProcessError as e: raise Failed(f"failed to get Docker container ids: {e}")
def get_version_tags(*, fetch: bool = True) -> List[semver.version.Version]: """List all the version-like tags in the repo Args: fetch: If false, don't automatically run `git fetch --tags`. """ if fetch: _fetch() tags = [] for t in spawn.capture(["git", "tag"], unicode=True).splitlines(): try: tags.append(semver.version.Version.parse(t.lstrip("v"))) except ValueError as e: print(f"WARN: {e}", file=sys.stderr) return sorted(tags, reverse=True)
def ls_files(root: Path, *specs: Union[Path, str]) -> Set[bytes]: """Find unignored files within the specified paths.""" # The goal here is to find all files in the working tree that are not # ignored by .gitignore. `git ls-files` doesn't work, because it reports # files that have been deleted in the working tree if they are still present # in the index. Using `os.walkdir` doesn't work because there is no good way # to evaluate .gitignore rules from Python. So we use `git diff` against the # empty tree, which appears to have the desired semantics. empty_tree = ( "4b825dc642cb6eb9a060e54bf8d69288fbee4904" # git hash-object -t tree /dev/null ) files = spawn.capture( ["git", "diff", "--name-only", "-z", empty_tree, "--", *specs], cwd=root, ).split(b"\0") return set(f for f in files if f.strip() != b"")
def get_version_tags(*, fetch: bool = True) -> List[semver.VersionInfo]: """List all the version-like tags in the repo Args: fetch: If false, don't update git, only intended for testing """ if fetch: spawn.runv(["git", "fetch", "--tags"]) tags = [] for t in spawn.capture(["git", "tag"], unicode=True).splitlines(): try: tags.append(semver.VersionInfo.parse(t.lstrip("v"))) except ValueError as e: print(f"WARN: {e}", file=sys.stderr) return sorted(tags, reverse=True)
def find_host_ports(self, service: str) -> List[str]: """Find all ports open on the host for a given service """ ps = spawn.capture(["./mzcompose", "--mz-quiet", "ps"], unicode=True) # technically 'docker-compose ps' has a `--filter` flag but... # https://github.com/docker/compose/issues/5996 service_lines = [ l.strip() for l in ps.splitlines() if service in l and "Up" in l ] ports = [] for line in service_lines: line_parts = line.split() host_tcp_parts = [p for p in line_parts if "/tcp" in p and "->" in p] these_ports = [p.split(":")[1].split("-")[0] for p in host_tcp_parts] ports.extend(these_ports) return ports
def get_docker_processes(running: bool = False) -> List[Dict[str, Any]]: """ Use 'docker ps' to return all Docker process information. :param running: If True, only return running processes. :return: str of processes """ try: if running: cmd = ["docker", "ps", "--format", "{{ json . }}"] else: cmd = ["docker", "ps", "-a", "--format", "{{ json . }}"] # json technically returns any out = spawn.capture(cmd, unicode=True) procs = [json.loads(line) for line in out.splitlines()] return cast(List[Dict[str, Any]], procs) except subprocess.CalledProcessError as e: raise errors.Failed(f"failed to get Docker container ids: {e}")
def rev_parse(rev: str, *, abbrev: bool = False) -> str: """Compute the hash for a revision. Args: rev: A Git revision in any format known to the Git CLI. abbrev: Return a branch or tag name instead of a git sha Returns: ref: A 40 character hex-encoded SHA-1 hash representing the ID of the named revision in Git's object database. With "abbrev=True" this will return an abbreviated ref, or throw an error if there is no abbrev. """ a = ["--abbrev-ref"] if abbrev else [] out = spawn.capture(["git", "rev-parse", *a, "--verify", rev]).strip() if not out: raise RuntimeError(f"No parsed rev for {rev}") return out
def run(self, comp: Composition, workflow: Workflow) -> None: pattern = f"{comp.name}_{self._container}" ui.progress(f"Ensuring {self._container} stays up ", "C") for i in range(self._uptime_secs, 0, -1): time.sleep(1) try: stdout = spawn.capture(["docker", "ps", "--format={{.Names}}"], unicode=True) except subprocess.CalledProcessError as e: raise Failed(f"{e.stdout}") found = False for line in stdout.splitlines(): if line.startswith(pattern): found = True break if not found: print(f"failed! {pattern} logs follow:") print_docker_logs(pattern, 10) raise Failed(f"container {self._container} stopped running!") ui.progress(f" {i}") print()
def main() -> int: # If the test filter metadata doesn't exist, run all tests. exists = subprocess.run( ["buildkite-agent", "meta-data", "exists", "tests"]) if exists.returncode == 100: return 0 # Otherwise, filter down to the selected tests. with open(Path(__file__).parent / "pipeline.yml") as f: pipeline = yaml.safe_load(f.read()) selected_tests = set( spawn.capture(["buildkite-agent", "meta-data", "get", "tests"]).splitlines()) new_steps = [] for step in pipeline["steps"]: if "id" in step and step["id"] in selected_tests: del step["id"] new_steps.append(step) spawn.runv( ["buildkite-agent", "pipeline", "upload", "--replace"], stdin=yaml.dump(new_steps).encode(), ) return 0
def fetch() -> str: """Fetch from all configured default fetch remotes""" return spawn.capture(["git", "fetch", "--tags"], unicode=True).strip()
def describe() -> str: """Describe the relationship between the current commit and the most recent tag""" return spawn.capture(["git", "describe"], unicode=True).strip()
def list_prs(recent_ref: Optional[str], ancestor_ref: Optional[str]) -> None: """ List PRs between a range of refs If no refs are specified, then this will find the refs between the most recent tag and the previous semver tag (i.e. excluding RCs) """ git.fetch() if recent_ref is None or ancestor_ref is None: tags = git.get_version_tags(fetch=False) if recent_ref is None: recent = tags[0] recent_ref = str(tags[0]) else: recent = Version.parse(recent_ref) if ancestor_ref is None: for ref in tags[1:]: ancestor = ref if ( ancestor.major < recent.major or ancestor.minor < recent.minor or ancestor.patch < recent.patch ): ancestor_ref = str(ref) break ui.say( f"Using recent_ref={recent_ref} ancestor_ref={ancestor_ref}", ) commit_range = f"v{ancestor_ref}..v{recent_ref}" commits = spawn.capture( [ "git", "log", "--pretty=format:%d %s", "--abbrev-commit", "--date=iso", commit_range, "--", ], unicode=True, ) pattern = re.compile(r"^\s*\(refs/pullreqs/(\d+)|\(#(\d+)") prs = [] found_ref = False for commit in commits.splitlines(): if "build(deps)" in commit: continue match = pattern.search(commit) if match is not None: pr = match.group(1) if pr: found_ref = True else: pr = match.group(2) prs.append(pr) if not found_ref: ui.say( "WARNING: you probably don't have pullreqs configured for your repo", ) ui.say( "Add the following line to the MaterializeInc/materialize remote section in your .git/config", ) ui.say(" fetch = +refs/pull/*/head:refs/pullreqs/*") username = input("Enter your github username: "******"~/.config/materialize/dev-tools-access-token") try: with open(creds_path) as fh: token = fh.read().strip() except FileNotFoundError: raise UIError( f"""No developer tool api token at {creds_path!r} please create an access token at https://github.com/settings/tokens""" ) def get(pr: str) -> Any: return requests.get( f"https://{username}:{token}@api.github.com/repos/MaterializeInc/materialize/pulls/{pr}", headers={ "Accept": "application/vnd.github.v3+json", }, ).json() collected = [] with concurrent.futures.ThreadPoolExecutor(max_workers=10) as pool: futures = {pool.submit(get, pr): pr for pr in prs} for future in concurrent.futures.as_completed(futures): pr = futures[future] contents = future.result() try: url = contents["html_url"] title = contents["title"] collected.append((url, title)) except KeyError: raise UIError(contents) for url, title in sorted(collected): print(url, title)