def __init__(self, name, has_python, skip_java, skip_scala, use_source_root, root_dir, debug_port, context, targets, transitive, target_util, spec_excludes): """Creates a new, unconfigured, Project based at root_dir and comprised of the sources visible to the given targets.""" self.context = context self.target_util = target_util self.name = name self.root_dir = root_dir self.targets = OrderedSet(targets) self.transitive = transitive self.sources = [] self.py_sources = [] self.py_libs = [] self.resource_extensions = set() self.has_python = has_python self.skip_java = skip_java self.skip_scala = skip_scala self.use_source_root = use_source_root self.has_scala = False self.has_tests = False self.debug_port = debug_port self.internal_jars = OrderedSet() self.external_jars = OrderedSet() self.spec_excludes = spec_excludes
def __init__(self, name, has_python, skip_java, skip_scala, root_dir, checkstyle_suppression_files, debug_port, targets, transitive, workunit_factory): """Creates a new, unconfigured, Project based at root_dir and comprised of the sources visible to the given targets.""" self.name = name self.root_dir = root_dir self.targets = OrderedSet(targets) self.transitive = transitive self.workunit_factory = workunit_factory self.sources = [] self.py_sources = [] self.py_libs = [] self.resource_extensions = set() self.has_python = has_python self.skip_java = skip_java self.skip_scala = skip_scala self.has_scala = False self.has_tests = False self.checkstyle_suppression_files = checkstyle_suppression_files # Absolute paths. self.debug_port = debug_port self.internal_jars = OrderedSet() self.external_jars = OrderedSet()
def __init__(self, name, has_python, skip_java, skip_scala, use_source_root, root_dir, debug_port, context, targets, transitive, target_util, spec_excludes=None, build_ignore_patterns=None): """Creates a new, unconfigured, Project based at root_dir and comprised of the sources visible to the given targets.""" deprecated_conditional(lambda: spec_excludes is not None, '0.0.75', 'Use build_ignore_patterns instead.') self.context = context self.target_util = target_util self.name = name self.root_dir = root_dir self.targets = OrderedSet(targets) self.transitive = transitive self.sources = [] self.py_sources = [] self.py_libs = [] self.resource_extensions = set() self.has_python = has_python self.skip_java = skip_java self.skip_scala = skip_scala self.use_source_root = use_source_root self.has_scala = False self.has_tests = False self.debug_port = debug_port self.internal_jars = OrderedSet() self.external_jars = OrderedSet() self.spec_excludes = spec_excludes self.build_ignore_patterns = build_ignore_patterns
def check_artifact_cache(self, vts): """Checks the artifact cache for the specified VersionedTargetSets. Returns a list of the ones that were satisfied from the cache. These don't require building. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) with self.context.new_workunit('check'): pool = ThreadPool(processes=6) res = pool.map( lambda vt: self._artifact_cache.use_cached_files(vt.cache_key), vts, chunksize=1) pool.close() pool.join() for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) vt.update() return cached_vts, list(uncached_vts)
def extern_store_set(self, context_handle, vals_ptr, vals_len): """Given storage and an array of Handles, return a new Handle to represent the set.""" c = self._ffi.from_handle(context_handle) return c.to_value( OrderedSet( c.from_value(val[0]) for val in self._ffi.unpack(vals_ptr, vals_len)))
def do_check_artifact_cache(self, vts, post_process_cached_vts=None): """Checks the artifact cache for the specified list of VersionedTargetSets. Returns a pair (cached, uncached) of VersionedTargets that were satisfied/unsatisfied from the cache. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) cache = self.get_artifact_cache() items = [(cache, vt.cache_key) for vt in vts] res = self.context.subproc_map(call_use_cached_files, items) for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) elif isinstance(was_in_cache, UnreadableArtifact): self._cache_key_errors.update(was_in_cache.key) # Note that while the input vts may represent multiple targets (for tasks that overrride # check_artifact_cache_for), the ones we return must represent single targets. def flatten(vts): return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts])) all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts) if post_process_cached_vts: post_process_cached_vts(all_cached_vts) for vt in all_cached_vts: vt.update() return all_cached_vts, all_uncached_vts
def _unexcluded_dependencies(self, jardepmap, binary): # TODO(John Sirois): Kill this and move jar exclusion to use confs excludes = set() for exclude_key in ((e.org, e.name) if e.name else e.org for e in binary.deploy_excludes): exclude = jardepmap.get(exclude_key) if exclude: for basedir, jars in exclude.items(): for jar in jars: excludes.add((basedir, jar)) if excludes: self.context.log.debug('Calculated excludes:\n\t%s' % '\n\t'.join(str(e) for e in excludes)) externaljars = OrderedSet() def add_jars(target): mapped = jardepmap.get(target) if mapped: for basedir, jars in mapped.items(): for externaljar in jars: if (basedir, externaljar) not in excludes: externaljars.add((basedir, externaljar)) else: self.context.log.debug('Excluding %s from binary' % externaljar) binary.walk(add_jars) return externaljars
def create(cls, relative_to, files=None, globs=None, rglobs=None, zglobs=None): """Given various file patterns create a PathGlobs object (without using filesystem operations). :param relative_to: The path that all patterns are relative to (which will itself be relative to the buildroot). :param files: A list of relative file paths to include. :type files: list of string. :param string globs: A relative glob pattern of files to include. :param string rglobs: A relative recursive glob pattern of files to include. :param string zglobs: A relative zsh-style glob pattern of files to include. :param zglobs: A relative zsh-style glob pattern of files to include. :rtype: :class:`PathGlobs` """ filespecs = OrderedSet() for specs, pattern_cls in ((files, Globs), (globs, Globs), (rglobs, RGlobs), (zglobs, ZGlobs)): if not specs: continue res = pattern_cls.to_filespec(specs) exclude = res.get('exclude') if exclude: raise ValueError( 'Excludes not supported for PathGlobs. Got: {}'.format( exclude)) new_specs = res.get('globs', None) if new_specs: filespecs.update(new_specs) return cls.create_from_specs(relative_to, filespecs)
def do_check_artifact_cache(self, vts, post_process_cached_vts=None): """Checks the artifact cache for the specified list of VersionedTargetSets. Returns a pair (cached, uncached) of VersionedTargets that were satisfied/unsatisfied from the cache. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) with self.context.new_workunit(name='check', labels=[WorkUnit.MULTITOOL]) as parent: res = self.context.submit_foreground_work_and_wait( Work(lambda vt: bool(self.get_artifact_cache().use_cached_files(vt.cache_key)), [(vt, ) for vt in vts], 'fetch'), workunit_parent=parent) for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) # Note that while the input vts may represent multiple targets (for tasks that overrride # check_artifact_cache_for), the ones we return must represent single targets. def flatten(vts): return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts])) all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts) if post_process_cached_vts: post_process_cached_vts(all_cached_vts) for vt in all_cached_vts: vt.update() return all_cached_vts, all_uncached_vts
def check_artifact_cache(self, vts): """Checks the artifact cache for the specified VersionedTargetSets. Returns a list of the ones that were satisfied from the cache. These don't require building. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) if self._artifact_cache and self.context.options.read_from_artifact_cache: pool = ThreadPool(processes=6) res = pool.map( lambda vt: self._artifact_cache.use_cached_files(vt.cache_key), vts, chunksize=1) pool.close() pool.join() for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) self.context.log.info('Using cached artifacts for %s' % vt.targets) vt.update() else: self.context.log.info('No cached artifacts for %s' % vt.targets) return cached_vts, list(uncached_vts)
def reduce(self, execution_request): """The main reduction loop.""" # 1. Whenever we don't have enough work to saturate the pool, request more. # 2. Whenever the pool is not saturated, submit currently pending work. # Step instances which have not been submitted yet. pending_submission = OrderedSet() in_flight = dict() # Dict from step id to a Promise for Steps that have been submitted. submit_until = functools.partial(self._submit_until, pending_submission, in_flight) await_one = functools.partial(self._await_one, in_flight) for step_batch in self._scheduler.schedule(execution_request): if not step_batch: # A batch should only be empty if all dependency work is currently blocked/running. if not in_flight and not pending_submission: raise StepBatchException( 'Scheduler provided an empty batch while no work is in progress!') else: # Submit and wait for work for as long as we're able to keep the pool saturated. pending_submission.update(step_batch) while submit_until(self._pool_size) > 0: await_one() # Await at least one entry per scheduling loop. submit_until(0) if in_flight: await_one() # Consume all steps. while pending_submission or in_flight: submit_until(self._pool_size) await_one()
def all_imports(self): """Return all imports for this package, including any test imports. :rtype: list of string """ return list( OrderedSet(self.imports + self.test_imports + self.x_test_imports))
def configure_compile_context(self, targets): """ Trims the context's target set to just those targets needed as jars on the IDE classpath. All other targets only contribute their external jar dependencies and excludes to the classpath definition. """ def is_cp(target): return ( target.is_codegen or # Some IDEs need annotation processors pre-compiled, others are smart enough to detect and # proceed in 2 compile rounds target.is_apt or (self.skip_java and is_java(target)) or (self.skip_scala and is_scala(target)) or (self.intransitive and target not in self.context.target_roots)) jars = OrderedSet() excludes = OrderedSet() compiles = OrderedSet() def prune(target): if target.is_jvm: if target.excludes: excludes.update(target.excludes) jars.update(jar for jar in target.jar_dependencies if jar.rev) if is_cp(target): target.walk(compiles.add) for target in targets: target.walk(prune) self.context.replace_targets(compiles) self.binary = self.context.add_new_target( self.work_dir, JvmBinary, name='%s-external-jars' % self.project_name, dependencies=jars, excludes=excludes, configurations=('default', 'sources', 'javadoc')) self.require_jar_dependencies(predicate=lambda t: t == self.binary) self.context.log.debug( 'pruned to cp:\n\t%s' % '\n\t'.join(str(t) for t in self.context.targets()))
def reduce(self, build_request, fail_slow=False): executor = self.Executor(self._pool, self._pool_size, fail_slow=fail_slow, debug=self._debug) # Steps move from `pending_submission` to `in_flight`. pending_submission = OrderedSet() in_flight = dict() def submit_until(n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min( len(pending_submission) - n, self._pool_size - len(in_flight)) for _ in range(to_submit): step, promise = pending_submission.pop(last=False) if step in in_flight: raise Exception('{} is already in_flight!'.format(step)) in_flight[step] = promise executor.submit(step) return to_submit def await_one(): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise Exception('Awaited an empty pool!') step, result = executor.await_one_result() if step not in in_flight: raise Exception( 'Received unexpected work from the Executor: {} vs {}'. format(step, in_flight.keys())) in_flight.pop(step).success(result) # The main reduction loop: # 1. Whenever we don't have enough work to saturate the pool, request more. # 2. Whenever the pool is not saturated, submit currently pending work. for step_batch in self._scheduler.schedule(build_request): if not step_batch: # A batch should only be empty if all dependency work is currently blocked/running. if not in_flight and not pending_submission: raise Exception( 'Scheduler provided an empty batch while no work is in progress!' ) else: # Submit and wait for work for as long as we're able to keep the pool saturated. pending_submission.update(step_batch) while submit_until(self._pool_size) > 0: await_one() # Await at least one entry per scheduling loop. submit_until(0) if in_flight: await_one() # Consume all steps. while pending_submission or in_flight: submit_to_capacity() await_one()
def _prepare(self, context, goals): if len(goals) == 0: raise TaskError('No goals to prepare') goal_info_by_goal = OrderedDict() for goal in reversed(OrderedSet(goals)): self._visit_goal(goal, context, goal_info_by_goal) for goal_info in reversed(list(self._topological_sort(goal_info_by_goal))): yield GoalExecutor(context, goal_info.goal, goal_info.tasks_by_name)
def _prepare(self, context, goals): if len(goals) == 0: raise TaskError('No goals to prepare') goal_info_by_goal = OrderedDict() target_roots_replacement = self.TargetRootsReplacement() for goal in reversed(OrderedSet(goals)): self._visit_goal(goal, context, goal_info_by_goal, target_roots_replacement) target_roots_replacement.apply(context) for goal_info in reversed(list(self._topological_sort(goal_info_by_goal))): yield GoalExecutor(context, goal_info.goal, goal_info.tasktypes_by_name)
def configure_compile_context(self, targets): """ Trims the context's target set to just those targets needed as jars on the IDE classpath. All other targets only contribute their external jar dependencies and excludes to the classpath definition. """ def is_cp(target): return ( target.is_codegen or # Some IDEs need annotation processors pre-compiled, others are smart enough to detect and # proceed in 2 compile rounds isinstance(target, AnnotationProcessor) or (self.skip_java and is_java(target)) or (self.skip_scala and is_scala(target)) or (self.intransitive and target not in self.context.target_roots) ) jars = OrderedSet() excludes = OrderedSet() compiles = OrderedSet() def prune(target): if target.is_jvm: if target.excludes: excludes.update(target.excludes) jars.update(jar for jar in target.jar_dependencies) if is_cp(target): target.walk(compiles.add) for target in targets: target.walk(prune) # TODO(John Sirois): Restructure to use alternate_target_roots Task lifecycle method self.context._replace_targets(compiles) self.jar_dependencies = jars self.context.log.debug('pruned to cp:\n\t{}'.format( '\n\t'.join(str(t) for t in self.context.targets()) ))
def configure_target(target): if target not in analyzed_targets: analyzed_targets.add(target) self.has_scala = not self.skip_scala and (self.has_scala or is_scala(target)) # Hack for java_sources and Eclipse/IntelliJ: add java_sources to project if isinstance(target, ScalaLibrary): for java_source in target.java_sources: configure_target(java_source) # Resources are already in the target set if target.has_resources: resources_by_basedir = defaultdict(set) for resources in target.resources: analyzed_targets.add(resources) resources_by_basedir[resources.target_base].update(relative_sources(resources)) for basedir, resources in resources_by_basedir.items(): self.resource_extensions.update(Project.extract_resource_extensions(resources)) configure_source_sets(basedir, resources, is_test=target.is_test, resources_only=True) if target.has_sources(): test = target.is_test self.has_tests = self.has_tests or test base = target.target_base configure_source_sets(base, relative_sources(target), is_test=test, resources_only=isinstance(target, Resources)) # TODO(Garrett Malmquist): This is dead code, and should be redone/reintegrated. # Other BUILD files may specify sources in the same directory as this target. Those BUILD # files might be in parent directories (globs('a/b/*.java')) or even children directories if # this target globs children as well. Gather all these candidate BUILD files to test for # sources they own that live in the directories this targets sources live in. target_dirset = find_source_basedirs(target) if not isinstance(target.address, BuildFileAddress): return [] # Siblings only make sense for BUILD files. candidates = OrderedSet() build_file = target.address.build_file dir_relpath = os.path.dirname(build_file.relpath) for descendant in BuildFile.scan_build_files(build_file.project_tree, dir_relpath, spec_excludes=self.spec_excludes, build_ignore_patterns=self.build_ignore_patterns): candidates.update(self.target_util.get_all_addresses(descendant)) if not self._is_root_relpath(dir_relpath): ancestors = self._collect_ancestor_build_files(build_file.project_tree, os.path.dirname(dir_relpath), self.build_ignore_patterns) for ancestor in ancestors: candidates.update(self.target_util.get_all_addresses(ancestor)) def is_sibling(target): return source_target(target) and target_dirset.intersection(find_source_basedirs(target)) return filter(is_sibling, [self.target_util.get(a) for a in candidates if a != target.address])
def create(cls, relative_to, files=None, globs=None, rglobs=None, zglobs=None): """Given various file patterns create a PathGlobs object (without using filesystem operations). TODO: This currently sortof-executes parsing via 'to_filespec'. Should maybe push that out to callers to make them deal with errors earlier. :param relative_to: The path that all patterns are relative to (which will itself be relative to the buildroot). :param files: A list of relative file paths to include. :type files: list of string. :param string globs: A relative glob pattern of files to include. :param string rglobs: A relative recursive glob pattern of files to include. :param string zglobs: A relative zsh-style glob pattern of files to include. :param zglobs: A relative zsh-style glob pattern of files to include. :rtype: :class:`PathGlobs` """ filespecs = OrderedSet() for specs, pattern_cls in ((files, Globs), (globs, Globs), (rglobs, RGlobs), (zglobs, ZGlobs)): if not specs: continue res = pattern_cls.to_filespec(specs) excludes = res.get('excludes') if excludes: raise ValueError( 'Excludes not supported for PathGlobs. Got: {}'.format( excludes)) new_specs = res.get('globs', None) if new_specs: filespecs.update(new_specs) path_globs = [] for filespec in filespecs: # TODO: These will be implemented as part of finishing: # https://github.com/pantsbuild/pants/issues/2946 if cls._RECURSIVE in filespec: raise ValueError('TODO: Unsupported: {}'.format(filespec)) elif cls._SINGLE in filespec: raise ValueError('TODO: Unsupported: {}'.format(filespec)) elif '*' in filespec: raise ValueError('TODO: Unsupported: {}'.format(filespec)) else: # A literal path. path_globs.append(PathGlob(relative_to, filespec)) return cls(tuple(path_globs))
def list_external_jar_dependencies(self, binary): """Returns the external jar dependencies of the given binary. :param binary: The jvm binary target to list transitive external dependencies for. :type binary: :class:`pants.backend.jvm.targets.jvm_binary.JvmBinary` :returns: A list of (jar path, coordinate) tuples. :rtype: list of (string, :class:`pants.java.jar.M2Coordinate`) """ classpath_products = self.context.products.get_data('runtime_classpath') classpath_entries = classpath_products.get_artifact_classpath_entries_for_targets( binary.closure(bfs=True, include_scopes=Scopes.JVM_RUNTIME_SCOPES, respect_intransitive=True)) external_jars = OrderedSet(jar_entry for conf, jar_entry in classpath_entries if conf == 'default') return [(entry.path, entry.coordinate) for entry in external_jars if not entry.is_excluded_by(binary.deploy_excludes)]
def __init__(self, failed_to_produce): """ :param failed_to_produce: A mapping of failed promises to the `FailedToProduce` product representing the failure. :type failed_to_produce: dict of (:class:`pants.engine.exp.scheduler.Promise`, :class:`FailedToProduce`) """ failed_targets = OrderedSet() for ftp in failed_to_produce.values(): for f in ftp.walk(): if isinstance(f.error, TaskError): failed_targets.update(f.error.failed_targets) super(Engine.PartialFailureError, self).__init__(exit_code=1, failed_targets=list(failed_targets)) self._failed_to_produce = failed_to_produce
def list_external_jar_dependencies(self, binary, confs=None): """Returns the external jar dependencies of the given binary. :param binary: The jvm binary target to list transitive external dependencies for. :type binary: :class:`pants.backend.jvm.targets.jvm_binary.JvmBinary` :param confs: The ivy configurations to include in the dependencies list, ('default',) by default. :type confs: :class:`collections.Iterable` of string :returns: A list of (jar path, coordinate) tuples. :rtype: list of (string, :class:`pants.backend.jvm.jar_dependency_utils.M2Coordinate`) """ classpath_products = self.context.products.get_data('runtime_classpath') classpath_entries = classpath_products.get_artifact_classpath_entries_for_targets([binary]) confs = confs or ('default',) external_jars = OrderedSet(jar_entry for conf, jar_entry in classpath_entries if conf in confs) return [(entry.path, entry.coordinate) for entry in external_jars if not entry.is_excluded_by(binary.deploy_excludes)]
def _recover(self): """ Recover launchers for existing clusters. A newly created scheduler has no launcher to recover. TODO(jyx): The recovery of clusters can potentially be parallelized. """ for cluster_name in OrderedSet(self._state.clusters): # Make a copy so we can remove dead # entries while iterating the copy. log.info("Recovering launcher for cluster %s" % cluster_name) try: cluster = self._state_provider.load_cluster_state(cluster_name) if not cluster: # The scheduler could have failed over before creating the launcher. The user request # should have failed and there is no cluster state to restore. log.info("Skipping cluster %s because its state cannot be found" % cluster_name) self._state.clusters.remove(cluster_name) self._state_provider.dump_scheduler_state(self._state) continue for task_id in cluster.tasks: self._tasks[task_id] = cluster.name # Reconstruct the 'tasks' map. # Order of launchers is preserved thanks to the OrderedSet. # For recovered launchers we use the currently specified --framework_role and # --executor_environ, etc., instead of saving it in cluster state so the change in flags can # be picked up by existing clusters. self._launchers[cluster.name] = MySQLClusterLauncher( self._driver, cluster, self._state_provider, self._discover_zk_url, self._kazoo, self._framework_user, self._executor_uri, self._executor_cmd, self._election_timeout, self._admin_keypath, self._installer_args, self._backup_store_args, self._executor_environ, self._framework_role) except StateProvider.Error as e: raise self.Error("Failed to recover cluster: %s" % e.message) log.info("Recovered %s clusters" % len(self._launchers))
def _prepare(self, context, goals): if len(goals) == 0: raise TaskError('No goals to prepare') # Option values are usually computed lazily on demand, # but command line options are eagerly computed for validation. for scope in context.options.scope_to_flags.keys(): context.options.for_scope(scope) goal_info_by_goal = OrderedDict() target_roots_replacement = self.TargetRootsReplacement() for goal in reversed(OrderedSet(goals)): self._visit_goal(goal, context, goal_info_by_goal, target_roots_replacement) target_roots_replacement.apply(context) for goal_info in reversed( list(self._topological_sort(goal_info_by_goal))): yield GoalExecutor(context, goal_info.goal, goal_info.tasktypes_by_name)
def create(cls, ftype, relative_to, files=None, globs=None, rglobs=None, zglobs=None): """Given various file patterns create a PathGlobs object (without using filesystem operations). TODO: This currently sortof-executes parsing via 'to_filespec'. Should maybe push that out to callers to make them deal with errors earlier. :param relative_to: The path that all patterns are relative to (which will itself be relative to the buildroot). :param ftype: A Stat subclass indicating which Stat type will be matched. :param files: A list of relative file paths to include. :type files: list of string. :param string globs: A relative glob pattern of files to include. :param string rglobs: A relative recursive glob pattern of files to include. :param string zglobs: A relative zsh-style glob pattern of files to include. :param zglobs: A relative zsh-style glob pattern of files to include. :rtype: :class:`PathGlobs` """ relative_to = normpath(relative_to) filespecs = OrderedSet() for specs, pattern_cls in ((files, Globs), (globs, Globs), (rglobs, RGlobs), (zglobs, ZGlobs)): if not specs: continue res = pattern_cls.to_filespec(specs) excludes = res.get('excludes') if excludes: raise ValueError( 'Excludes not supported for PathGlobs. Got: {}'.format( excludes)) new_specs = res.get('globs', None) if new_specs: filespecs.update(new_specs) return cls.create_from_specs(ftype, relative_to, filespecs)
def configure_jvm(self, extra_source_paths, extra_test_paths): """ Configures this project's source sets returning the full set of targets the project is comprised of. The full set can be larger than the initial set of targets when any of the initial targets only has partial ownership of its source set's directories. """ # TODO(John Sirois): much waste lies here, revisit structuring for more readable and efficient # construction of source sets and excludes ... and add a test! analyzed_targets = OrderedSet() targeted = set() def relative_sources(target): sources = target.payload.sources.relative_to_buildroot() return [ os.path.relpath(source, target.target_base) for source in sources ] def source_target(target): result = ((self.transitive or target in self.targets) and target.has_sources() and (not (self.skip_java and is_java(target)) and not (self.skip_scala and is_scala(target)))) return result def configure_source_sets(relative_base, sources, is_test=False, resources_only=False): absolute_base = os.path.join(self.root_dir, relative_base) paths = set([os.path.dirname(source) for source in sources]) for path in paths: absolute_path = os.path.join(absolute_base, path) # Note, this can add duplicate source paths to self.sources(). We'll de-dup them later, # because we want to prefer test paths. targeted.add(absolute_path) source_set = SourceSet(self.root_dir, relative_base, path, is_test=is_test, resources_only=resources_only) self.sources.append(source_set) def find_source_basedirs(target): dirs = set() if source_target(target): absolute_base = os.path.join(self.root_dir, target.target_base) dirs.update([ os.path.join(absolute_base, os.path.dirname(source)) for source in relative_sources(target) ]) return dirs def configure_target(target): if target not in analyzed_targets: analyzed_targets.add(target) self.has_scala = not self.skip_scala and (self.has_scala or is_scala(target)) # Hack for java_sources and Eclipse/IntelliJ: add java_sources to project if isinstance(target, ScalaLibrary): for java_source in target.java_sources: configure_target(java_source) # Resources are already in the target set if target.has_resources: resources_by_basedir = defaultdict(set) for resources in target.resources: analyzed_targets.add(resources) resources_by_basedir[resources.target_base].update( relative_sources(resources)) for basedir, resources in resources_by_basedir.items(): self.resource_extensions.update( Project.extract_resource_extensions(resources)) configure_source_sets(basedir, resources, is_test=target.is_test, resources_only=True) if target.has_sources(): test = target.is_test self.has_tests = self.has_tests or test base = target.target_base configure_source_sets(base, relative_sources(target), is_test=test, resources_only=isinstance( target, Resources)) # TODO(Garrett Malmquist): This is dead code, and should be redone/reintegrated. # Other BUILD files may specify sources in the same directory as this target. Those BUILD # files might be in parent directories (globs('a/b/*.java')) or even children directories if # this target globs children as well. Gather all these candidate BUILD files to test for # sources they own that live in the directories this targets sources live in. target_dirset = find_source_basedirs(target) if not isinstance(target.address, BuildFileAddress): return [] # Siblings only make sense for BUILD files. candidates = self.target_util.get_all_addresses( target.address.build_file) for ancestor in target.address.build_file.ancestors(): candidates.update( self.target_util.get_all_addresses(ancestor)) for sibling in target.address.build_file.siblings(): candidates.update( self.target_util.get_all_addresses(sibling)) for descendant in target.address.build_file.descendants( spec_excludes=self.spec_excludes): candidates.update( self.target_util.get_all_addresses(descendant)) def is_sibling(target): return source_target( target) and target_dirset.intersection( find_source_basedirs(target)) return filter(is_sibling, [ self.target_util.get(a) for a in candidates if a != target.address ]) resource_targets = [] for target in self.targets: if isinstance(target, Resources): # Wait to process these until all resources that are reachable from other targets are # processed. That way we'll only add a new SourceSet if this target has never been seen # before. This allows test resource SourceSets to be properly keep the is_test property. resource_targets.append(target) else: target.walk(configure_target, predicate=source_target) for target in resource_targets: target.walk(configure_target) def full_path(source_set): return os.path.join(source_set.root_dir, source_set.source_base, source_set.path) # Check if there are any overlapping source_sets, and output an error message if so. # Overlapping source_sets cause serious problems with package name inference. overlap_error = ( 'SourceSets {current} and {previous} evaluate to the same full path.' ' This can be caused by multiple BUILD targets claiming the same source,' ' e.g., if a BUILD target in a parent directory contains an rglobs() while' ' a BUILD target in a subdirectory of that uses a globs() which claims the' ' same sources. This may cause package names to be inferred incorrectly (e.g.,' ' you might see src.com.foo.bar.Main instead of com.foo.bar.Main).' ) source_full_paths = {} for source_set in sorted(self.sources, key=full_path): full = full_path(source_set) if full in source_full_paths: previous_set = source_full_paths[full] logger.debug( overlap_error.format(current=source_set, previous=previous_set)) source_full_paths[full] = source_set # We need to figure out excludes, in doing so there are 2 cases we should not exclude: # 1.) targets depend on A only should lead to an exclude of B # A/BUILD # A/B/BUILD # # 2.) targets depend on A and C should not lead to an exclude of B (would wipe out C) # A/BUILD # A/B # A/B/C/BUILD # # 1 approach: build set of all paths and parent paths containing BUILDs our targets depend on - # these are unexcludable unexcludable_paths = set() for source_set in self.sources: parent = os.path.join(self.root_dir, source_set.source_base, source_set.path) while True: unexcludable_paths.add(parent) parent, _ = os.path.split(parent) # no need to add the repo root or above, all source paths and extra paths are children if parent == self.root_dir: break for source_set in self.sources: paths = set() source_base = os.path.join(self.root_dir, source_set.source_base) for root, dirs, _ in safe_walk( os.path.join(source_base, source_set.path)): if dirs: paths.update( [os.path.join(root, directory) for directory in dirs]) unused_children = paths - targeted if unused_children: for child in unused_children: if child not in unexcludable_paths: source_set.excludes.append( os.path.relpath(child, source_base)) targets = OrderedSet() for target in self.targets: target.walk(lambda target: targets.add(target), source_target) targets.update(analyzed_targets - targets) self.sources.extend( SourceSet(get_buildroot(), p, None, is_test=False) for p in extra_source_paths) self.sources.extend( SourceSet(get_buildroot(), p, None, is_test=True) for p in extra_test_paths) if self.use_source_root: self.sources = Project._collapse_by_source_root( self.context.source_roots, self.sources) self.sources = self.dedup_sources(self.sources) return targets
def reduce(self, execution_request): # Step instances which have not been submitted yet. # TODO: Scheduler now only sends work once, so a deque should be fine here. pending_submission = OrderedSet() # Dict from step id to a Promise for Steps that have been submitted. in_flight = dict() def submit_until(n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min( len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 for _ in range(to_submit): step, promise = pending_submission.pop(last=False) if step.step_id in in_flight: raise Exception('{} is already in_flight!'.format(step)) step = self._storage.key_for_request(step) result = self._maybe_cache_get(step) if result is not None: # Skip in_flight on cache hit. promise.success(result) else: in_flight[step.step_id] = promise self._submit(step) submitted += 1 return submitted def await_one(): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise Exception('Awaited an empty pool!') step_id, result = self._pool.await_one_result() if isinstance(result, Exception): raise result result = self._storage.resolve_result(result) if step_id not in in_flight: raise Exception( 'Received unexpected work from the Executor: {} vs {}'. format(step_id, in_flight.keys())) in_flight.pop(step_id).success(result) # The main reduction loop: # 1. Whenever we don't have enough work to saturate the pool, request more. # 2. Whenever the pool is not saturated, submit currently pending work. for step_batch in self._scheduler.schedule(execution_request): if not step_batch: # A batch should only be empty if all dependency work is currently blocked/running. if not in_flight and not pending_submission: raise Exception( 'Scheduler provided an empty batch while no work is in progress!' ) else: # Submit and wait for work for as long as we're able to keep the pool saturated. pending_submission.update(step_batch) while submit_until(self._pool_size) > 0: await_one() # Await at least one entry per scheduling loop. submit_until(0) if in_flight: await_one() # Consume all steps. while pending_submission or in_flight: submit_until(self._pool_size) await_one()
def sort_goals(self, context, goals): goal_info_by_goal = OrderedDict() for goal in reversed(OrderedSet(goals)): self._visit_goal(goal, context, goal_info_by_goal) return list(reversed(list(self._topological_sort(goal_info_by_goal))))
def __init__(self, framework_info): self.framework_info = framework_info self.clusters = OrderedSet( ) # Names of clusters this scheduler manages. cluster creation
def configure_jvm(self, extra_source_paths, extra_test_paths): """ Configures this project's source sets returning the full set of targets the project is comprised of. The full set can be larger than the initial set of targets when any of the initial targets only has partial ownership of its source set's directories. """ # TODO(John Sirois): much waste lies here, revisit structuring for more readable and efficient # construction of source sets and excludes ... and add a test! analyzed = OrderedSet() targeted = set() def source_target(target): return ((self.transitive or target in self.targets) and target.has_sources() and (not target.is_codegen and not (self.skip_java and is_java(target)) and not (self.skip_scala and is_scala(target)))) def configure_source_sets(relative_base, sources, is_test): absolute_base = os.path.join(self.root_dir, relative_base) paths = set([os.path.dirname(source) for source in sources]) for path in paths: absolute_path = os.path.join(absolute_base, path) if absolute_path not in targeted: targeted.add(absolute_path) self.sources.append( SourceSet(self.root_dir, relative_base, path, is_test)) def find_source_basedirs(target): dirs = set() if source_target(target): absolute_base = os.path.join(self.root_dir, target.target_base) dirs.update([ os.path.join(absolute_base, os.path.dirname(source)) for source in target.sources ]) return dirs def configure_target(target): if target not in analyzed: analyzed.add(target) self.has_scala = not self.skip_scala and (self.has_scala or is_scala(target)) if target.has_resources: resources_by_basedir = defaultdict(set) for resources in target.resources: resources_by_basedir[resources.target_base].update( resources.sources) for basedir, resources in resources_by_basedir.items(): self.resource_extensions.update( Project.extract_resource_extensions(resources)) configure_source_sets(basedir, resources, is_test=False) if target.sources: test = target.is_test self.has_tests = self.has_tests or test configure_source_sets(target.target_base, target.sources, is_test=test) # Other BUILD files may specify sources in the same directory as this target. Those BUILD # files might be in parent directories (globs('a/b/*.java')) or even children directories if # this target globs children as well. Gather all these candidate BUILD files to test for # sources they own that live in the directories this targets sources live in. target_dirset = find_source_basedirs(target) candidates = Target.get_all_addresses(target.address.buildfile) for ancestor in target.address.buildfile.ancestors(): candidates.update(Target.get_all_addresses(ancestor)) for sibling in target.address.buildfile.siblings(): candidates.update(Target.get_all_addresses(sibling)) for descendant in target.address.buildfile.descendants(): candidates.update(Target.get_all_addresses(descendant)) def is_sibling(target): return source_target( target) and target_dirset.intersection( find_source_basedirs(target)) return filter( is_sibling, [Target.get(a) for a in candidates if a != target.address]) for target in self.targets: target.walk(configure_target, predicate=source_target) # We need to figure out excludes, in doing so there are 2 cases we should not exclude: # 1.) targets depend on A only should lead to an exclude of B # A/BUILD # A/B/BUILD # # 2.) targets depend on A and C should not lead to an exclude of B (would wipe out C) # A/BUILD # A/B # A/B/C/BUILD # # 1 approach: build set of all paths and parent paths containing BUILDs our targets depend on - # these are unexcludable unexcludable_paths = set() for source_set in self.sources: parent = os.path.join(self.root_dir, source_set.source_base, source_set.path) while True: unexcludable_paths.add(parent) parent, _ = os.path.split(parent) # no need to add the repo root or above, all source paths and extra paths are children if parent == self.root_dir: break for source_set in self.sources: paths = set() source_base = os.path.join(self.root_dir, source_set.source_base) for root, dirs, _ in os.walk( os.path.join(source_base, source_set.path)): if dirs: paths.update( [os.path.join(root, directory) for directory in dirs]) unused_children = paths - targeted if unused_children: for child in unused_children: if child not in unexcludable_paths: source_set.excludes.append( os.path.relpath(child, source_base)) targets = OrderedSet() for target in self.targets: target.walk(lambda target: targets.add(target), source_target) targets.update(analyzed - targets) self.sources.extend( SourceSet(get_buildroot(), p, None, False) for p in extra_source_paths) self.sources.extend( SourceSet(get_buildroot(), p, None, True) for p in extra_test_paths) return targets