예제 #1
0
    def split(self, source_file, target_file_pattern, paths_to_files):
        '''
        function that saves selected parts of a xml file into different files 
        :param source_file: original xml file
        :param target_file_pattern:  patter of the target files. The target files will be named as following: <target_file_pattern><filenumber>.xml
        :param paths_to_files: list of paths that select a part of the original document and the filenumbers where they will be saved 
        :return: list of paths to splitted files
        '''
        self.path_to_files = paths_to_files
        # assert that input is a list of file numbers
        for path in paths_to_files:
            assert isinstance(paths_to_files[path], set)
            for filenum in paths_to_files[path]:
                assert isinstance(filenum, int)

        # filenum to full file path
        for path in paths_to_files:
            paths_to_files[path] = set([
                "%s.%s.xml" % (target_file_pattern, x)
                for x in paths_to_files[path]
            ])

        # self.register_write_nodes([x for x in self.path_to_files])
        self.walk_tree(file_path=source_file)
        self.post_actions()
        self.close()
        Logger.info("splitting %s completed" % source_file)
        return self.path_to_files
예제 #2
0
    def check_for_references(self, **kwargs):
        """
        FastXMLWalker callback function
        Checks all attributes of the current element whether it occurs in the list of IDs
        If an ID has been found the element and all parent elements are searched for an attribute named ID
        if such an element has been found a reference from the ID to the ID found in the attribute is created
        :param kwargs: FastXMLWalker kwargs
        :return: Mone
        """
        element = kwargs["element"]
        for attrib in [element.attrib[x] for x in element.attrib if x != "ID"]:
            if attrib in self.IDs:
                # find parent
                tmp_elem = element
                target_id = attrib
                source_id = None
                while True:
                    if "ID" in tmp_elem.attrib:
                        source_id = tmp_elem.attrib["ID"]
                        break
                    if tmp_elem.getparent() is None:
                        break
                    tmp_elem = tmp_elem.getparent()

                if not source_id == target_id:
                    self.Refs[source_id].add(target_id)
                    if len(self.Refs) % 10000 == 0:
                        Logger.info("%s Refs" % len(self.Refs))
예제 #3
0
    def search_stepxml(self, myfile, split_path_node_size_tuples=None):
        """
        Search for connected sets in a StepXML file
        :param myfile: Path to StepXML file
        :param split_path_node_size_tuples: list of paths to splitnode IDs (e.g.: ["//{http://www.stibosystems.com/step}Product/@ID"])
        :return: list of connected sets (list of IDs)
        """
        fx = FastXMLCallbackWalker()

        interests = {
            Interest(
                interest="//@ID",
                callback=self.add_id
            )
        }
        if not split_path_node_size_tuples:
            split_path_node_size_tuples = [("//{http://www.stibosystems.com/step}Product/@ID", 10)]
        for splitnode_path, node_restriction in split_path_node_size_tuples:
            interests.add(SplitPath(interest=splitnode_path, callback=self.add_split_node, node_restriction=node_restriction))

        fx.register_interests(
            interests
        )
        fx.walk_tree(myfile)
        Logger.debug("IDs: %s" % self.IDs)
        Logger.debug("split_nodes: %s" %self.SplitNodes)
        Logger.info("%S IDs found, %s SplitNodes found" % (len(self.IDs, len(self.SplitNodes))))
        fx2 = FastXMLCallbackWalker()
        fx2.register_event_callback("start", self.check_for_references)
        fx2.walk_tree(myfile)

        Logger.debug("direct: %s" % self.Refs)
        Logger.info("%s direct dependencies found" % (len(self.Refs)))

        idr = IndirectIDResovler(self.Refs, self.SplitNodes)
        idr.resolve_indirect()
        Logger.debug("indirect: %s" % idr.refs)
        Logger.info("%s indirect dependencies found" % (len(idr.refs)))

        connected_sets = self.calc_connected_sets(self.Refs, self.SplitNodes)
        Logger.debug("connected_sets: %s" % connected_sets)
        Logger.info("connected sets calculation completed")
        connected_sets2 = {}
        for path in connected_sets:
            connected_set2 = []
            for connected_set_path in connected_sets[path]:
                my_set = set()
                for item in connected_set_path:
                    for exactpath in self.IDs2Exact[item]:
                        my_set.add(exactpath)
                connected_set2.append(my_set)
            connected_sets2[path] = connected_set2
        nd = NodeDistributor(connected_sets2)
        distribution_to_files = nd.distribute()

        Logger.debug("distribution to files: %s" % distribution_to_files)
        Logger.info("distribution to files completed")

        return distribution_to_files
예제 #4
0
 def add_split_node(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on split nodes
     To split nodes (self.SplitNodes[<interest>]) the exact path of this node (e.g: /*[0]/*[0](*[0]) is added
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     #self.SplitNodes[kwargs["interest"]].append(kwargs["walker"].exact_path)
     self.SplitNodes[kwargs["interest"]].append(kwargs["walker"].exact_path)
     if sum([len(self.SplitNodes[x]) for x in self.SplitNodes]) % 10000 == 0:
         Logger.info("%s SplitNodes" % sum([len(self.SplitNodes[x]) for x in self.SplitNodes]))
예제 #5
0
 def add_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should fire on a  node with an ID
     The found ID is added to the set of IDs
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].attrib["ID"]
     if not found_id in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs" % len(self.IDs))
예제 #6
0
 def add_split_node(self, **kwargs):
     """
     FastXMLWalker callback function
     Add a found split nodes: This callback should fire on a splitnode, it will then add the node to a list of splitnodes
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     ID = kwargs["element"].attrib["ID"]
     self.SplitNodes[kwargs['interest']].append(ID)
     walker = kwargs["walker"]
     self.IDs2Exact[ID].add(walker.exact_path)
     if sum([len(self.SplitNodes[x]) for x in self.SplitNodes]) % 10000 == 0:
         Logger.info("%s SplitNodes identified" % sum([len(self.SplitNodes[x]) for x in self.SplitNodes]))
예제 #7
0
 def add_split_node_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on the id node of a split node
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].text
     self.add_id(**kwargs)
     self.ExactPathIDs2SplitNodes[kwargs["walker"].exact_path.rsplit("/", 1)[0]].append(found_id)
     if found_id not in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs identified" % len(self.IDs))
예제 #8
0
 def add_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on nodes that contain an ID
     Saves the id to self.IDs
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].text
     exact_path_parent = kwargs["walker"].exact_path.rsplit("/", 1)[0]
     self.IDs2ExactPaths[found_id].add(exact_path_parent)
     if found_id not in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs" % len(self.IDs))
예제 #9
0
    def search_genif2(self, genif_file, split_path_node_restriction_tuples):
        """
        Search connected sets in genif2 files
        :param genif_file: path to a genif2 file
        :return: list of sets of exact paths (each list entry should be written in a different file)
        """
        # root=None
        fx = FastXMLCallbackWalker()
        if not split_path_node_restriction_tuples:
            split_path_node_restriction_tuples = [
                ("/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}item", 1),
                ("/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}asset", 2),
            ]
        interests = {Interest(
            interest="/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}relation/{http://www.media-saturn.com/msx}source/{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_source
        ), Interest(
            interest="/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}relation/{http://www.media-saturn.com/msx}target/{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_target
        ), Interest(
            interest="//{http://www.media-saturn.com/msx}relation",
            callback=self.relation_to_ref,
            event='end'
        ), Interest(
            interest="//{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_id
        )}
        for split_path_node_restriction_tuple in split_path_node_restriction_tuples:
            interests.add(
                SplitPath(
                    interest=split_path_node_restriction_tuple[0],
                    callback=self.add_split_node,
                    node_restriction=split_path_node_restriction_tuple[1]
                )
            )
            interests.add(
                Interest(
                    interest="%s/{http://www.media-saturn.com/msx}uniqueID" % split_path_node_restriction_tuple[0],
                    callback=self.add_split_node_id,
                )
            )
        fx.register_interests(
            interests
        )
        for _uuid in fx._relative_interests_trees:
            Logger.debug(RenderTree(fx._relative_interests_trees[_uuid].interest_tree))
        fx.walk_tree(genif_file)
        Logger.debug("ids: %s" % self.IDs)
        Logger.debug("split_nodes: %s" % self.SplitNodes)
        Logger.info("%s IDs, %s split nodes, %s direct references identified" % (len(self.IDs), sum([len(self.SplitNodes[x]) for x in self.SplitNodes]), len(self.Refs)))

        Logger.debug("IDsSplitNodes %s" % self.ExactPathIDs2SplitNodes)
        Logger.debug("direct: %s" % self.Refs)
        self.calc_splitnode_ids()

        idr = IndirectIDResovler(self.Refs, self.SplitNodes)
        idr.resolve_indirect()
        Logger.debug("indirect: %s" % idr.refs)
        Logger.info("indirect reference calculation completed")

        connected_sets = self.calc_connected_sets(self.SplitNodes)

        Logger.debug("connected_sets: %s" % connected_sets)
        Logger.info("connected set calculation completed")

        nd=NodeDistributor(connected_sets)
        distribution_to_files = nd.distribute()

        Logger.debug("distribution to files: %s" % distribution_to_files)
        Logger.info("distribution to files completed")

        return distribution_to_files