def process_config(self): """ Processes a populated config dict, identifies files to be processed, creates Document objects for each, links dependencies and finally does topological sort to establish order of batch run. """ # Define the parse_doc nested function which we will call recursively. def parse_doc(path, input_directive, args = {}): # If a specification is nested in a dependency, then input_directive # may be a dict. If so, split it into parts before continuing. try: a, b = input_directive.popitem() input_directive = a args = b except AttributeError: pass tokens = input_directive.split("|") if "/" in tokens[0]: raise dexy.commands.UserFeedback("paths not allowed in tokens: %s" % tokens[0]) if path == '.': glob_string = tokens[0] else: glob_string = os.path.join(re.sub("^\./", "", path), tokens[0]) filters = tokens[1:] docs = [] # virtual document if re.search("@", glob_string): virtual = True dangerous = any(k in ['url', 'repo', 'path'] for k in args) if dangerous and not self.args['danger']: msg = "You are attempting to access a remote file %s." % glob_string msg += " You must specify -danger option to do this.\n" raise dexy.commands.UserFeedback(msg) glob_string = glob_string.replace("@", "") else: virtual = False regex = fnmatch.translate(glob_string).replace(".*", "(.*)") matcher = re.compile(regex) files = glob.glob(glob_string) nofiles = len(files) == 0 if nofiles and virtual: files = [glob_string] for f in files: create = True if not virtual: if os.path.isdir(f): create = False if args.has_key('disabled'): if args['disabled']: create = False self.log.warn("document %s|%s disabled" % (f, "|".join(filters))) inputs = [] if args.has_key('inputs'): if isinstance(args['inputs'], str) or isinstance(args['inputs'], unicode): raise dexy.commands.UserFeedback("inputs for %s should be an array" % f) for i in args['inputs']: # Create document objects for input patterns (just in this directory) for doc in parse_doc(path, i): inputs.append(doc.key()) m = matcher.match(f) if m and len(m.groups()) > 0: rootname = matcher.match(f).group(1) # The 'ifinput' directive says that if an input exists matching # the specified pattern, we should create this document and it # will depend on the specified input. if args.has_key('ifinput'): if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode): ifinputs = [args['ifinput']] else: self.log.debug("treating input %s as iterable. class: %s" % ( args['ifinput'], args['ifinput'].__class__.__name__)) ifinputs = args['ifinput'] for s in ifinputs: self.log.debug("evaluating ifinput %s" % s) ifinput = s.replace("%", rootname) self.log.debug("evaluating ifinput %s" % ifinput) input_docs = parse_doc(path, ifinput, {}) for input_doc in input_docs: inputs.append(input_doc.key()) if len(input_docs) == 0: create = False if args.has_key('ifnoinput'): ifinput = args['ifnoinput'].replace("%", rootname) input_docs = parse_doc(path, ifinput, {}) if len(input_docs) > 0: create = False if args.has_key('except'): try: except_re = re.compile(args['except']) except sre_constants.error as e: raise dexy.commands.UserFeedback("""You passed 'except' value of %s. Please pass a valid Python-style regular expression for 'except', NOT a glob-style matcher. Error message from re.compile: %s""" % (args['except'], e)) if re.match(except_re, f): self.log.warn("skipping %s for %s as it matches except pattern %s" % ( f, input_directive, args['except'] )) create = False if create: doc = dexy.document.Document() doc.set_controller(self) # Filters can either be included in the name... doc.set_name_and_filters(f, filters) # ...or they may be listed explicitly. if args.has_key('filters'): doc.filters += args['filters'] if args.has_key('loglevel'): doc.loglevelname = args['loglevel'] doc.setup_log() # After name has been set doc.virtual = virtual key = doc.key() self.log.debug("creating doc %s for glob %s" % (key, glob_string)) if self.members.has_key(key): doc = self.members[key] if args.has_key('priority'): doc.priority = args['priority'] del args['priority'] doc.args.update(args) if args.has_key('allinputs'): doc.use_all_inputs = args['allinputs'] if args.has_key('inputs'): doc.input_args = copy.copy(args['inputs']) doc.input_keys = [] for i in inputs: doc.add_input_key(i) self.members[key] = doc docs.append(doc) # docs is a local list of docs return docs # end of parse_doc nested function def get_pos(member): key = member.key() return self.members.keys().index(key) def depend(parent, child): self.depends.append((get_pos(child), get_pos(parent))) # The real processing starts here. self.members = OrderedDict() self.depends = [] self.batch_id = self.db.next_batch_id() if not self.args['silent']: print "batch id is", self.batch_id for path, config in self.config.iteritems(): ### @export "features-global-args-1" if config.has_key("$globals"): global_args = config["$globals"] else: global_args = {} if config.has_key("$variables"): global_variables = config["$variables"] else: global_variables = {} if self.args.has_key('globals'): global_args.update(self.args['globals']) for k, v in config.iteritems(): local_args = global_args.copy() local_args.update(v) local_args['$variables'] = global_variables for kg in global_args.keys(): if local_args.has_key(kg): if isinstance(local_args[kg], dict): local_args[kg].update(global_args[kg]) parse_doc(path, k, local_args) ### @end # Determine dependencies total_dependencies = 0 self.log.debug("Finalizing dependencies between documents...") for doc in self.members.values(): doc.finalize_inputs(self.members) total_dependencies += len(doc.inputs) for input_doc in doc.inputs: depend(doc, input_doc) self.log.debug("finalized dependencies for %s" % doc.key()) if len(doc.inputs) > 10: self.log.debug("%s inputs added" % len(doc.inputs)) elif len(doc.inputs) == 0: self.log.debug("no inputs added") else: self.log.debug("inputs added: %s" % ", ".join(d.key() for d in doc.inputs)) if len(self.args['run']) > 0: # Only run the specified document, and its dependencies. new_members = OrderedDict() new_depends = [] def new_get_pos(member): key = member.key() return new_members.keys().index(key) def new_depend(parent, child): new_depends.append((new_get_pos(child), new_get_pos(parent))) def parse_new_document(d): new_members[d.key()] = d for input_doc in d.inputs: if not input_doc.key() in new_members.keys(): new_members[input_doc.key()] = input_doc new_depend(d, input_doc) parse_new_document(input_doc) run_key = self.args['run'] if self.members.has_key(run_key): doc = self.members[run_key] else: matches = [k for k in self.members.keys() if k.startswith(run_key)] matches.sort(key=lambda k: len(self.members[k].inputs)) doc = self.members[matches[-1]] parse_new_document(doc) if not self.args['silent']: print "limiting members list to %s and its dependencies, %s/%s documents will be run" % (doc.key(), len(new_members), len(self.members)) self.members = new_members self.depends = new_depends num_members = len(self.members) if num_members > 0: dep_ratio = float(total_dependencies)/num_members else: dep_ratio = None if not self.args['silent']: print "sorting %s documents into run order, there are %s total dependencies" % (num_members, total_dependencies) if dep_ratio: print "ratio of dependencies to documents is %0.1f" % (dep_ratio) if dep_ratio > 10: print "if you are experiencing performance problems:" print "call dexy with -dryrun and inspect logs/batch-XXXX.json to debug dependencies" print "consider using -strictinherit or reducing your use of 'allinputs' " try: self.log.debug("Beginning topological sort...") topsort_ordering = topsort(self.depends) self.log.debug("Topological sort completed successfully.") except CycleError as e: print "There are circular dependencies!" answer, num_parents, children = e.args for child, parents in children.items(): for parent in parents: print "%s depends on %s" % (self.members.keys()[parent], self.members.keys()[child]) raise dexy.commands.UserFeedback(e.message) docs_without_dependencies = frozenset(range(len(self.members))) - frozenset(topsort_ordering) self.ordering = topsort_ordering + list(docs_without_dependencies) for i in self.ordering: key = self.members.keys()[i] self.docs.append(self.members[key])