def determine_modules_to_test(changed_modules): """ Given a set of modules that have changed, compute the transitive closure of those modules' dependent modules in order to determine the set of modules that should be tested. Returns a topologically-sorted list of modules (ties are broken by sorting on module names). >>> [x.name for x in determine_modules_to_test([modules.root])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.build])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.graphx])] ['graphx', 'examples'] >>> x = [x.name for x in determine_modules_to_test([modules.sql])] >>> x # doctest: +NORMALIZE_WHITESPACE ['sql', 'hive', 'mllib', 'examples', 'hive-thriftserver', 'hivecontext-compatibility', 'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml'] """ modules_to_test = set() for module in changed_modules: modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules)) modules_to_test = modules_to_test.union(set(changed_modules)) # If we need to run all of the tests, then we should short-circuit and return 'root' if modules.root in modules_to_test: return [modules.root] return toposort_flatten( {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
def determine_modules_to_test(changed_modules): """ Given a set of modules that have changed, compute the transitive closure of those modules' dependent modules in order to determine the set of modules that should be tested. Returns a topologically-sorted list of modules (ties are broken by sorting on module names). >>> [x.name for x in determine_modules_to_test([modules.root])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.build])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.graphx])] ['graphx', 'examples'] >>> x = [x.name for x in determine_modules_to_test([modules.sql])] >>> x # doctest: +NORMALIZE_WHITESPACE ['sql', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', 'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml'] """ modules_to_test = set() for module in changed_modules: modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules)) modules_to_test = modules_to_test.union(set(changed_modules)) # If we need to run all of the tests, then we should short-circuit and return 'root' if modules.root in modules_to_test: return [modules.root] return toposort_flatten( {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
def determine_modules_to_test(changed_modules, deduplicated=True): """ Given a set of modules that have changed, compute the transitive closure of those modules' dependent modules in order to determine the set of modules that should be tested. Returns a topologically-sorted list of modules (ties are broken by sorting on module names). If ``deduplicated`` is disabled, the modules are returned without tacking the deduplication by dependencies into account. >>> [x.name for x in determine_modules_to_test([modules.root])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.build])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.core])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.launcher])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.graphx])] ['graphx', 'examples'] >>> [x.name for x in determine_modules_to_test([modules.sql])] ... # doctest: +NORMALIZE_WHITESPACE ['sql', 'avro', 'docker-integration-tests', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-ml'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'catalyst', 'core', 'docker-integration-tests', 'examples', 'graphx', 'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl'] """ modules_to_test = set() for module in changed_modules: modules_to_test = modules_to_test.union( determine_modules_to_test(module.dependent_modules, deduplicated)) modules_to_test = modules_to_test.union(set(changed_modules)) if not deduplicated: return modules_to_test # If we need to run all of the tests, then we should short-circuit and return 'root' if modules.root in modules_to_test: return [modules.root] return toposort_flatten( { m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test }, sort=True)
def determine_modules_to_test(changed_modules): """ Given a set of modules that have changed, compute the transitive closure of those modules' dependent modules in order to determine the set of modules that should be tested. Returns a topologically-sorted list of modules (ties are broken by sorting on module names). >>> [x.name for x in determine_modules_to_test([modules.root])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.build])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.graphx])] ['graphx', 'examples'] >>> x = [x.name for x in determine_modules_to_test([modules.sql])] >>> x # doctest: +NORMALIZE_WHITESPACE ... # doctest: +SKIP ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml'] """ modules_to_test = set() for module in changed_modules: modules_to_test = modules_to_test.union( determine_modules_to_test(module.dependent_modules)) modules_to_test = modules_to_test.union(set(changed_modules)) # If we need to run all of the tests, then we should short-circuit and return 'root' if modules.root in modules_to_test: return [modules.root] changed_modules = toposort_flatten( { m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test }, sort=True) # TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it if modules.hadoop_version == "hadoop3.2": changed_modules = [ m for m in changed_modules if m.name != "hive-thriftserver" ] return changed_modules