Exemplo n.º 1
0
    def process_config(self):
        """
        Processes a populated config dict, identifies files to be processed,
        creates Document objects for each, links dependencies and finally does
        topological sort to establish order of batch run.
        """

        # Define the parse_doc nested function which we will call recursively.
        def parse_doc(path, input_directive, args = {}):
            # If a specification is nested in a dependency, then input_directive
            # may be a dict. If so, split it into parts before continuing.
            try:
                a, b = input_directive.popitem()
                input_directive = a
                args = b
            except AttributeError:
                pass

            tokens = input_directive.split("|")
            if "/" in tokens[0]:
                raise dexy.commands.UserFeedback("paths not allowed in tokens: %s" % tokens[0])
            if path == '.':
                glob_string = tokens[0]
            else:
                glob_string = os.path.join(re.sub("^\./", "", path), tokens[0])
            filters = tokens[1:]

            docs = []

            # virtual document
            if re.search("@", glob_string):
                virtual = True
                dangerous = any(k in ['url', 'repo', 'path'] for k in args)
                if dangerous and not self.args['danger']:
                    msg = "You are attempting to access a remote file %s." % glob_string
                    msg += " You must specify -danger option to do this.\n"
                    raise dexy.commands.UserFeedback(msg)
                glob_string = glob_string.replace("@", "")
            else:
                virtual = False

            regex = fnmatch.translate(glob_string).replace(".*", "(.*)")
            matcher = re.compile(regex)

            files = glob.glob(glob_string)

            nofiles = len(files) == 0

            if nofiles and virtual:
                files = [glob_string]

            for f in files:
                create = True
                if not virtual:
                    if os.path.isdir(f):
                        create = False

                if args.has_key('disabled'):
                    if args['disabled']:
                        create = False
                        self.log.warn("document %s|%s disabled" % (f, "|".join(filters)))

                inputs = []
                if args.has_key('inputs'):
                    if isinstance(args['inputs'], str) or isinstance(args['inputs'], unicode):
                        raise dexy.commands.UserFeedback("inputs for %s should be an array" % f)
                    for i in args['inputs']:
                        # Create document objects for input patterns (just in this directory)
                        for doc in parse_doc(path, i):
                            inputs.append(doc.key())


                m = matcher.match(f)
                if m and len(m.groups()) > 0:
                    rootname = matcher.match(f).group(1)

                # The 'ifinput' directive says that if an input exists matching
                # the specified pattern, we should create this document and it
                # will depend on the specified input.
                if args.has_key('ifinput'):
                    if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode):
                        ifinputs = [args['ifinput']]
                    else:
                        self.log.debug("treating input %s as iterable. class: %s" % (
                            args['ifinput'], args['ifinput'].__class__.__name__))
                        ifinputs = args['ifinput']

                    for s in ifinputs:
                        self.log.debug("evaluating ifinput %s" % s)
                        ifinput = s.replace("%", rootname)
                        self.log.debug("evaluating ifinput %s" % ifinput)
                        input_docs = parse_doc(path, ifinput, {})
                        for input_doc in input_docs:
                            inputs.append(input_doc.key())

                    if len(input_docs) == 0:
                        create = False

                if args.has_key('ifnoinput'):
                    ifinput = args['ifnoinput'].replace("%", rootname)
                    input_docs = parse_doc(path, ifinput, {})

                    if len(input_docs) > 0:
                        create = False

                if args.has_key('except'):
                    try:
                        except_re = re.compile(args['except'])
                    except sre_constants.error as e:
                        raise dexy.commands.UserFeedback("""You passed 'except' value of %s.
Please pass a valid Python-style regular expression for
'except', NOT a glob-style matcher. Error message from
re.compile: %s""" % (args['except'], e))
                    if re.match(except_re, f):
                        self.log.warn("skipping %s for %s as it matches except pattern %s" % (
                                f,
                                input_directive,
                                args['except']
                                ))
                        create = False

                if create:
                    doc = dexy.document.Document()
                    doc.set_controller(self)

                    # Filters can either be included in the name...
                    doc.set_name_and_filters(f, filters)
                    # ...or they may be listed explicitly.
                    if args.has_key('filters'):
                        doc.filters += args['filters']

                    if args.has_key('loglevel'):
                        doc.loglevelname = args['loglevel']
                    doc.setup_log() # After name has been set
                    doc.virtual = virtual

                    key = doc.key()
                    self.log.debug("creating doc %s for glob %s" % (key, glob_string))

                    if self.members.has_key(key):
                        doc = self.members[key]

                    if args.has_key('priority'):
                        doc.priority = args['priority']
                        del args['priority']

                    doc.args.update(args)

                    if args.has_key('allinputs'):
                        doc.use_all_inputs = args['allinputs']

                    if args.has_key('inputs'):
                        doc.input_args = copy.copy(args['inputs'])
                        doc.input_keys = []

                    for i in inputs:
                        doc.add_input_key(i)

                    self.members[key] = doc
                    docs.append(doc) # docs is a local list of docs

            return docs # end of parse_doc nested function

        def get_pos(member):
            key = member.key()
            return self.members.keys().index(key)

        def depend(parent, child):
            self.depends.append((get_pos(child), get_pos(parent)))

        # The real processing starts here.
        self.members = OrderedDict()
        self.depends = []

        self.batch_id = self.db.next_batch_id()
        if not self.args['silent']:
            print "batch id is", self.batch_id

        for path, config in self.config.iteritems():
            ### @export "features-global-args-1"
            if config.has_key("$globals"):
                global_args = config["$globals"]
            else:
                global_args = {}

            if config.has_key("$variables"):
                global_variables = config["$variables"]
            else:
                global_variables = {}

            if self.args.has_key('globals'):
                global_args.update(self.args['globals'])

            for k, v in config.iteritems():
                local_args = global_args.copy()
                local_args.update(v)
                local_args['$variables'] = global_variables
                for kg in global_args.keys():
                    if local_args.has_key(kg):
                        if isinstance(local_args[kg], dict):
                            local_args[kg].update(global_args[kg])
                parse_doc(path, k, local_args)
            ### @end

        # Determine dependencies
        total_dependencies = 0
        self.log.debug("Finalizing dependencies between documents...")
        for doc in self.members.values():
            doc.finalize_inputs(self.members)
            total_dependencies += len(doc.inputs)
            for input_doc in doc.inputs:
                depend(doc, input_doc)

            self.log.debug("finalized dependencies for %s" % doc.key())
            if len(doc.inputs) > 10:
                self.log.debug("%s inputs added" % len(doc.inputs))
            elif len(doc.inputs) == 0:
                self.log.debug("no inputs added")
            else:
                self.log.debug("inputs added: %s" % ", ".join(d.key() for d in doc.inputs))

        if len(self.args['run']) > 0:
            # Only run the specified document, and its dependencies.
            new_members = OrderedDict()
            new_depends = []

            def new_get_pos(member):
                key = member.key()
                return new_members.keys().index(key)

            def new_depend(parent, child):
                new_depends.append((new_get_pos(child), new_get_pos(parent)))

            def parse_new_document(d):
                new_members[d.key()] = d
                for input_doc in d.inputs:
                    if not input_doc.key() in new_members.keys():
                        new_members[input_doc.key()] = input_doc
                    new_depend(d, input_doc)
                    parse_new_document(input_doc)

            run_key = self.args['run']
            if self.members.has_key(run_key):
                doc = self.members[run_key]
            else:
                matches = [k for k in self.members.keys() if k.startswith(run_key)]
                matches.sort(key=lambda k: len(self.members[k].inputs))
                doc = self.members[matches[-1]]
            parse_new_document(doc)

            if not self.args['silent']:
                print "limiting members list to %s and its dependencies, %s/%s documents will be run" % (doc.key(), len(new_members), len(self.members))
            self.members = new_members
            self.depends = new_depends

        num_members = len(self.members)
        if num_members > 0:
            dep_ratio = float(total_dependencies)/num_members
        else:
            dep_ratio = None

        if not self.args['silent']:
            print "sorting %s documents into run order, there are %s total dependencies" % (num_members, total_dependencies)
            if dep_ratio:
                print "ratio of dependencies to documents is %0.1f" % (dep_ratio)
                if dep_ratio > 10:
                    print "if you are experiencing performance problems:"
                    print "call dexy with -dryrun and inspect logs/batch-XXXX.json to debug dependencies"
                    print "consider using -strictinherit or reducing your use of 'allinputs' "

        try:
            self.log.debug("Beginning topological sort...")
            topsort_ordering = topsort(self.depends)
            self.log.debug("Topological sort completed successfully.")
        except CycleError as e:
            print "There are circular dependencies!"
            answer, num_parents, children = e.args
            for child, parents in children.items():
                for parent in parents:
                    print "%s depends on %s" % (self.members.keys()[parent], self.members.keys()[child])
            raise dexy.commands.UserFeedback(e.message)

        docs_without_dependencies = frozenset(range(len(self.members))) - frozenset(topsort_ordering)
        self.ordering = topsort_ordering + list(docs_without_dependencies)

        for i in self.ordering:
            key = self.members.keys()[i]
            self.docs.append(self.members[key])