def _RDFGraph(self): # expensive to recompute graph = Graph() for URL, obj in self._pypeObjects.iteritems(): for s,p,o in obj._RDFGraph: graph.add( (s,p,o) ) return graph
class GrowingTree(object): def __init__(self, width, height, strategy): self.width = width self.height = height self.strategy = strategy self.graph = Graph(width, height) def run(self, verbose=False): cells = [(randrange(self.width), randrange(self.height)),] while len(cells) > 0: if verbose: os.system('clear') print self.graph.__unicode__() time.sleep(0.01) index = self.strategy(len(cells)) x, y = cells[index] dirs = [D.N, D.S, D.E, D.W] shuffle(dirs) for dir in dirs: # find neighbour nx, ny = x + D.DX[dir], y + D.DY[dir] # in bounds and neighbour unvisited? if 0 <= nx < self.width and 0 <= ny < self.height and self.graph[ny,nx] == 0: self.graph[y,x] |= dir self.graph[ny][nx] |= D.OPP[dir] index = -1 cells.append((nx, ny),) if index >= 0: cells.pop(index)
def _RDFGraph(self): # expensive to recompute graph = Graph() for URL, obj in self._pypeObjects.iteritems(): for s, p, o in obj._RDFGraph: graph.add((s, p, o)) return graph
def setReferenceRDFGraph(self, fn): self._referenceRDFGraph = Graph() self._referenceRDFGraph.load(fn) refMD5s = self._referenceRDFGraph.subject_objects( pypeNS["codeMD5digest"]) for URL, md5digest in refMD5s: obj = self._pypeObjects[str(URL)] obj.setReferenceMD5(md5digest)
def main(args): with io.open(args.file) as f: edges = list(parse_graph(f)) g = Graph(edges) scores = g.page_rank() print(">" * 80) for node, score in sorted(scores.items(), key=lambda x: x[1], reverse=True): print("{} ({})".format(node, score)) print("<" * 80)
def test_my2(self): edges = [ ["A", "B"], ["B", "C"], ["C", "A"], ["C", "B"], ["C", "D"], ] g = Graph(edges) # scores se espera que sea {nodo1: score1, nodo2: score2, ...} scores = g.page_rank(damping=0.85, limit=1.0e-8) sorted_nodes = [node for node, _ in sorted(scores.items(), key=lambda x: x[1], reverse=True)] self.assertSequenceEqual(sorted_nodes, ["C", "B", "A", "D"])
def setReferenceRDFGraph(self, fn): self._referenceRDFGraph = Graph() self._referenceRDFGraph.load(fn) refMD5s = self._referenceRDFGraph.subject_objects(pypeNS["codeMD5digest"]) for URL, md5digest in refMD5s: obj = self._pypeObjects[str(URL)] obj.setReferenceMD5(md5digest)
def depthFirstSearch(graph: com.Graph, currentNode: str, endNode: str, visited: Set[str], currentPath: List[str], allPaths: list): if currentNode in visited: return if not isBigCave(currentNode): visited.add(currentNode) currentPath.append(currentNode) if currentNode == endNode: allPaths.append(currentPath) return edges = graph.direct_connected_weights_and_edges(currentNode) for node in edges: if node not in visited: depthFirstSearch(graph, node, endNode, set(visited), list(currentPath), allPaths)
def depthFirstSearchAllowRevisited(graph: com.Graph, currentNode: str, endNode: str, visited: Set[str], visitedTwice: Set[str], currentPath: List[str], allPaths: list): if not canVisit(currentNode, visited, visitedTwice, True): return currentPath.append(currentNode) if currentNode == endNode: allPaths.append(currentPath) return edges = graph.direct_connected_weights_and_edges(currentNode) for node in edges: if canVisit(node, visited, visitedTwice): depthFirstSearchAllowRevisited(graph, node, endNode, set(visited), set(visitedTwice), list(currentPath), allPaths)
def findShortest(graph: com.Graph, currentNode, ownedKeys: set, allKeys: set, currentSteps: int): pathQueue = deque() alreadyHit = {} alreadyHit[(currentNode, frozenset(ownedKeys))] = currentSteps keysInOrder = list() pathQueue.append((currentSteps,currentNode, ownedKeys, keysInOrder)) allKeysHash = frozenset(allKeys).__hash__() shortestSoFar = impossible shortestPath = None while pathQueue: currentSteps, currentNode, ownedKeys, keysInOrder = pathQueue.popleft() ownedKeys = ownedKeys.copy() keysInOrder = keysInOrder.copy() if isKey(currentNode): ownedKeys.add(currentNode) keysInOrder.extend(currentNode) currentOwnedKeys = frozenset(ownedKeys) if allKeysHash == currentOwnedKeys.__hash__(): if shortestSoFar >= currentSteps: shortestSoFar = currentSteps shortestPath = keysInOrder continue else: continue for weight, edge in graph.direct_connected_weights_and_edges(currentNode): newSteps = currentSteps + weight if isDoor(edge) and edge.lower() not in ownedKeys: continue if alreadyHit.get((edge, currentOwnedKeys)): if alreadyHit.get((edge, currentOwnedKeys)) < newSteps: continue alreadyHit[(edge, currentOwnedKeys)] = newSteps pathQueue.append((newSteps, edge, ownedKeys, keysInOrder)) return shortestSoFar, shortestPath
# -*- coding:utf-8 -*- """ BGM1.py: Bipartite Graph Matching with 1-star local structure Written by Ding Rui Latest Version: 2020/5/24 """ from common import ArgParse, Graph, CostMatrix, PrintGED, Add1Star, SolveLSAP dot_sub, dot_ins, dot_del, edge_sub, edge_ins, edge_del, root_path, void, inf,\ g1, g2 = ArgParse() g1 = Graph(root_path + '/' + g1) g2 = Graph(root_path + '/' + g2) _, col, _ = SolveLSAP(CostMatrix(g1, g2, Add1Star)) answer = [(int(col[i]) if col[i] < g2.dots else void) for i in range(g1.dots)] PrintGED(g1, g2, tuple(answer))
#graph.add_edges( 4, 15, 9) #graph.add_edges( 5, 14, 11) #graph.add_edges( 6, 15, 9, 1, 12, 7, 3) #graph.add_edges( 7, 6, 3, 10, 8, 13, 12) #graph.add_edges( 8, 2, 10, 7, 13) #graph.add_edges( 9, 4, 15, 6, 1) #graph.add_edges(10, 3, 7, 8, 2) #graph.add_edges(11, 5, 14, 12, 13) #graph.add_edges(12, 1, 6, 7, 13, 11, 14) #graph.add_edges(13, 11, 12, 7, 8) #graph.add_edges(14, 5, 11, 12, 1) #graph.add_edges(15, 4, 9, 6, 3) if __name__ == '__main__': total = 40 graph = Graph(nodes=range(1, 16)) graph.add_edges( 1, ) graph.add_edges( 2, ) graph.add_edges( 3, ) graph.add_edges( 4, ) graph.add_edges( 5, ) graph.add_edges( 6, ) graph.add_edges( 7, ) graph.add_edges( 8, ) graph.add_edges( 9, ) graph.add_edges(10, ) graph.add_edges(11, ) graph.add_edges(12, ) graph.add_edges(13, ) graph.add_edges(14, ) graph.add_edges(15, )
def __init__(self, width, height, strategy): self.width = width self.height = height self.strategy = strategy self.graph = Graph(width, height)
def solve(): matrix = [map(int, line.strip().split(',')) for line in open('resources/p081_matrix.txt')] g = Graph() g.add_node('source') g.add_node('target') for r in xrange(len(matrix)): for c in xrange(len(matrix[r])): g.add_node((r, c)) for r in xrange(len(matrix)): for c in xrange(len(matrix[r])): if r > 0: g.add_edge((r-1,c), (r,c), matrix[r][c]) if c > 0: g.add_edge((r,c-1), (r,c), matrix[r][c]) g.add_edge('source', (0, 0), matrix[0][0]) g.add_edge((len(matrix)-1, len(matrix[0])-1), 'target', 0) costs, _ = dijkstra(g, 'source') return costs['target']
def parse_models(self, parse_stateful = False): """Method to query and read data from database. Method to query database and read models into Graph objects. Args: parse_stateful (bool) : Boolean to indicate whether graphs with stateful partitioned call should be parsed, these graphs do not contain a graph structure or tensors. Defaults to False. Returns: List of Graph objects corresponding to the graph objects the models in the spanner database have been parsed into. """ model_graphs = list() # Query to get all models from Models table with self.database.snapshot() as snapshot: qresult_models = snapshot.execute_sql( "SELECT model_name, category, sub_category, source, num_inputs" " FROM Models" ) for row in qresult_models: # Checking num_inputs for presence of graph structure if row[4] == 0 and not parse_stateful: continue # Extracting model attributes model_name = row[0] category = row[1] sub_category = row[2] source = row[3] nodes = list() edges = list() start_node_indices = list() adj_list = dict() # Querying Operators of model_name with self.database.snapshot() as snapshot: qresult_operators = snapshot.execute_sql( "SELECT * from Models JOIN Operators" " ON Models.model_name = Operators.model_name" " WHERE Models.model_name = '" + model_name + "'" " ORDER BY operator_id" ) # Dictionary to hold which field is in which index of query results field_to_index = dict() # Boolean to check if field_to_dict needs to be populated populate_dicts = True # Extracting Node attributes for row in qresult_operators: if populate_dicts: for index in range(len(qresult_operators.metadata.row_type.fields)): field_name = qresult_operators.metadata.row_type.fields[index].name field_to_index[field_name] = index populate_dicts = False new_node = Node.Node(None, None) for attr in vars(new_node).keys(): if attr in field_to_index: setattr(new_node, attr, row[field_to_index[attr]]) nodes.append(new_node) # populating start_node_indices using is_input field if row[field_to_index['is_input']]: start_node_indices.append(len(nodes) - 1) # Querying Tensors of model_name with self.database.snapshot() as snapshot: qresult_tensors = snapshot.execute_sql( "SELECT * from Models JOIN Tensors" " ON Models.model_name = Tensors.model_name" " WHERE Models.model_name = '" + model_name + "'" " ORDER BY tensor_id" ) # Dictionary to hold which field is in which index of query results field_to_index.clear() # Boolean to check if field_to_dict needs to be populated populate_dicts = True # Extracting Edge attributes for row in qresult_tensors: if populate_dicts: for index in range(len(qresult_tensors.metadata.row_type.fields)): field_name = qresult_tensors.metadata.row_type.fields[index].name field_to_index[field_name] = index populate_dicts = False new_edge = Edge.Edge(None, None) for attr in vars(new_edge).keys(): if attr in field_to_index: setattr(new_edge, attr, row[field_to_index[attr]]) edges.append(new_edge) to_operator_ids = row[field_to_index['to_operator_ids']] from_operator_ids = row[field_to_index['from_operator_ids']] edge_index = len(edges) - 1 for src_node_index in from_operator_ids: src_node_index -= 1 for dest_node_index in to_operator_ids: dest_node_index -= 1 if src_node_index not in adj_list: adj_list.update({src_node_index : []}) adj_list[src_node_index].append([edge_index, dest_node_index]) new_graph = Graph.Graph(nodes, start_node_indices, edges, adj_list, model_name, category, sub_category) new_graph.source = source model_graphs.append(new_graph) return model_graphs
class PypeWorkflow(PypeObject): """ Representing a PypeWorkflow. PypeTask and PypeDataObjects can be added into the workflow and executed through the instanct methods. >>> import os, time >>> from pypeflow.data import PypeLocalFile, makePypeLocalFile, fn >>> from pypeflow.task import * >>> try: ... os.makedirs("/tmp/pypetest") ... _ = os.system("rm -f /tmp/pypetest/*") ... except Exception: ... pass >>> time.sleep(1) >>> fin = makePypeLocalFile("/tmp/pypetest/testfile_in", readOnly=False) >>> fout = makePypeLocalFile("/tmp/pypetest/testfile_out", readOnly=False) >>> @PypeTask(outputDataObjs={"test_out":fout}, ... inputDataObjs={"test_in":fin}, ... parameters={"a":'I am "a"'}, **{"b":'I am "b"'}) ... def test(self): ... print test.test_in.localFileName ... print test.test_out.localFileName ... os.system( "touch %s" % fn(test.test_out) ) ... pass >>> os.system( "touch %s" % (fn(fin)) ) 0 >>> from pypeflow.controller import PypeWorkflow >>> wf = PypeWorkflow() >>> wf.addTask(test) >>> def finalize(self): ... def f(): ... print "in finalize:", self._status ... return f >>> test.finalize = finalize(test) # For testing only. Please don't do this in your code. The PypeTask.finalized() is intended to be overriden by subclasses. >>> wf.refreshTargets( objs = [fout] ) /tmp/pypetest/testfile_in /tmp/pypetest/testfile_out in finalize: done True """ supportedURLScheme = ["workflow"] def __init__(self, URL = None, **attributes ): if URL == None: URL = "workflow://" + __file__+"/%d" % id(self) self._pypeObjects = {} PypeObject.__init__(self, URL, **attributes) self._referenceRDFGraph = None #place holder for a reference RDF def addObject(self, obj): self.addObjects([obj]) def addObjects(self, objs): """ Add data objects into the workflow. One can add also task object to the workflow using this method for non-threaded workflow. """ for obj in objs: if obj.URL in self._pypeObjects: if id(self._pypeObjects[obj.URL]) != id(obj): raise PypeError, "Add different objects with the same URL %s" % obj.URL else: continue self._pypeObjects[obj.URL] = obj def addTask(self, taskObj): self.addTasks([taskObj]) def addTasks(self, taskObjs): """ Add tasks into the workflow. The dependent input and output data objects are added automatically too. It sets the message queue used for communicating between the task thread and the main thread. One has to use addTasks() or addTask() to add task objects to a threaded workflow. """ for taskObj in taskObjs: if isinstance(taskObj, PypeTaskCollection): for subTaskObj in taskObj.getTasks() + taskObj.getScatterGatherTasks(): self.addObjects(subTaskObj.inputDataObjs.values()) self.addObjects(subTaskObj.outputDataObjs.values()) self.addObjects(subTaskObj.mutableDataObjs.values()) self.addObject(subTaskObj) else: for dObj in taskObj.inputDataObjs.values() +\ taskObj.outputDataObjs.values() +\ taskObj.mutableDataObjs.values() : if isinstance(dObj, PypeSplittableLocalFile): self.addObjects([dObj._completeFile]) self.addObjects([dObj]) self.addObject(taskObj) def removeTask(self, taskObj): self.removeTasks([taskObj]) def removeTasks(self, taskObjs ): """ Remove tasks from the workflow. """ self.removeObjects(taskObjs) def removeObjects(self, objs): """ Remove objects from the workflow. If the object cannot be found, a PypeError is raised. """ for obj in objs: if obj.URL in self._pypeObjects: del self._pypeObjects[obj.URL] else: raise PypeError, "Unable to remove %s from the graph. (Object not found)" % obj.URL def updateURL(self, oldURL, newURL): obj = self._pypeObjects[oldURL] obj._updateURL(newURL) self._pypeObjects[newURL] = obj del self._pypeObjects[oldURL] @property def _RDFGraph(self): # expensive to recompute graph = Graph() for URL, obj in self._pypeObjects.iteritems(): for s,p,o in obj._RDFGraph: graph.add( (s,p,o) ) return graph def setReferenceRDFGraph(self, fn): self._referenceRDFGraph = Graph() self._referenceRDFGraph.load(fn) refMD5s = self._referenceRDFGraph.subject_objects(pypeNS["codeMD5digest"]) for URL, md5digest in refMD5s: obj = self._pypeObjects[str(URL)] obj.setReferenceMD5(md5digest) def _graphvizDot(self, shortName=False): graph = self._RDFGraph dotStr = StringIO() shapeMap = {"file":"box", "state":"box", "task":"component"} colorMap = {"file":"yellow", "state":"cyan", "task":"green"} dotStr.write( 'digraph "%s" {\n rankdir=LR;' % self.URL) for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme not in shapeMap: continue else: shape = shapeMap[URLParseResult.scheme] color = colorMap[URLParseResult.scheme] s = URL if shortName == True: s = URLParseResult.scheme + "://..." + URLParseResult.path.split("/")[-1] dotStr.write( '"%s" [shape=%s, fillcolor=%s, style=filled];\n' % (s, shape, color)) for row in graph.query('SELECT ?s ?o WHERE {?s pype:prereq ?o . }', initNs=dict(pype=pypeNS)): s, o = row if shortName == True: s = urlparse(s).scheme + "://..." + urlparse(s).path.split("/")[-1] o = urlparse(o).scheme + "://..." + urlparse(o).path.split("/")[-1] dotStr.write( '"%s" -> "%s";\n' % (o, s)) for row in graph.query('SELECT ?s ?o WHERE {?s pype:hasMutable ?o . }', initNs=dict(pype=pypeNS)): s, o = row if shortName == True: s = urlparse(s).scheme + "://..." + urlparse(s).path.split("/")[-1] o = urlparse(o).scheme + "://..." + urlparse(o).path.split("/")[-1] dotStr.write( '"%s" -- "%s" [arrowhead=both, style=dashed ];\n' % (s, o)) dotStr.write ("}") return dotStr.getvalue() @property def graphvizDot(self): return self._graphvizDot() @property def graphvizShortNameDot(self): return self._graphvizDot(shortName = True) @property def makeFileStr(self): """ generate a string that has the information of the execution dependency in a "Makefile" like format. It can be written into a "Makefile" and executed by "make". """ for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme != "task": continue taskObj = self._pypeObjects[URL] if not hasattr(taskObj, "script"): raise TaskTypeError("can not convert non shell script based workflow to a makefile") makeStr = StringIO() for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme != "task": continue taskObj = self._pypeObjects[URL] inputFiles = taskObj.inputDataObjs outputFiles = taskObj.outputDataObjs #for oStr in [o.localFileName for o in outputFiles.values()]: if 1: oStr = " ".join( [o.localFileName for o in outputFiles.values()]) iStr = " ".join([i.localFileName for i in inputFiles.values()]) makeStr.write( "%s:%s\n" % ( oStr, iStr ) ) makeStr.write( "\t%s\n\n" % taskObj.script ) makeStr.write("all: %s" % " ".join([o.localFileName for o in outputFiles.values()]) ) return makeStr.getvalue() @staticmethod def getSortedURLs(rdfGraph, objs): if len(objs) != 0: connectedPypeNodes = set() for obj in objs: if isinstance(obj, PypeSplittableLocalFile): obj = obj._completeFile for x in rdfGraph.transitive_objects(URIRef(obj.URL), pypeNS["prereq"]): connectedPypeNodes.add(x) tSortedURLs = PypeGraph(rdfGraph, connectedPypeNodes).tSort( ) else: tSortedURLs = PypeGraph(rdfGraph).tSort( ) return tSortedURLs def refreshTargets(self, objs = [], callback = (None, None, None) ): """ Execute the DAG to reach all objects in the "objs" argument. """ tSortedURLs = self.getSortedURLs(self._RDFGraph, objs) for URL in tSortedURLs: obj = self._pypeObjects[URL] if not isinstance(obj, PypeTaskBase): continue else: obj() obj.finalize() self._runCallback(callback) return True def _runCallback(self, callback = (None, None, None ) ): if callback[0] != None and callable(callback[0]): argv = [] kwargv = {} if callback[1] != None and isinstance( callback[1], type(list()) ): argv = callback[1] else: raise TaskExecutionError( "callback argument type error") if callback[2] != None and isinstance( callback[1], type(dict()) ): kwargv = callback[2] else: raise TaskExecutionError( "callback argument type error") callback[0](*argv, **kwargv) elif callback[0] != None: raise TaskExecutionError( "callback is not callable") @property def dataObjects( self ): return [ o for o in self._pypeObjects.values( ) if isinstance( o, PypeDataObjectBase )] @property def tasks( self ): return [ o for o in self._pypeObjects.values( ) if isinstance( o, PypeTaskBase )] @property def inputDataObjects(self): graph = self._RDFGraph inputObjs = [] for obj in self.dataObjects: r = graph.query('SELECT ?o WHERE {<%s> pype:prereq ?o . }' % obj.URL, initNs=dict(pype=pypeNS)) if len(r) == 0: inputObjs.append(obj) return inputObjs @property def outputDataObjects(self): graph = self._RDFGraph outputObjs = [] for obj in self.dataObjects: r = graph.query('SELECT ?s WHERE {?s pype:prereq <%s> . }' % obj.URL, initNs=dict(pype=pypeNS)) if len(r) == 0: outputObjs.append(obj) return outputObjs
stack.append((edge, iter(graph.get_edges(edge)))) visitor.send((Event.TreeEdge, node, edge)) visitor.send((Event.DiscoverVertex, edge)) elif color[edge] == Color.Gray: visitor.send((Event.BackEdge, node, edge)) except StopIteration: node, _ = stack.pop() color[node] = Color.Black visitor.send((Event.FinishVertex, node)) if __name__ == "__main__": from common import Graph def print_visitor(graph): while True: message = yield print Event.get_name(message[0]), if Event.is_vertex_event(message[0]): print "\t[{}]".format(message[1]) else: print "\t[{}]->[{}]".format(message[1], message[2]) graph = Graph(['a', 'b', 'c', 'd', 'e', 'f']) graph.add_edges('a', 'b', 'c') graph.add_edges('b', 'c', 'e') graph.add_edges('c', 'd', 'f') graph.add_edges('d', 'e') graph.add_edges('e', 'f') visitor = print_visitor(graph) visitor.next() graph_dfs_visit(graph, 'a', visitor)
edges1 = 0 edges2 = 0 for val in dict1.values(): if val > 0: edges1 += val else: edges2 -= val # 边替换总不劣于边删除+边插入 if edges1 > edges2: result += (edges2 * edge_sub + (edges1 - edges2) * edge_del) else: result += (edges1 * edge_sub + (edges2 - edges1) * edge_ins) return result graph1 = Graph(root_path + '/'+ graph1) graph2 = Graph(root_path + '/'+ graph2) queue = PriorityQueue() for i in range(graph2.dots): queue.put(Partial(graph1, graph2, 0, tuple(), i)) queue.put(Partial(graph1, graph2, 0, tuple(), void)) while True: partial = queue.get() if len(partial.part_map) == graph1.dots: PrintGED(graph1, graph2, partial.part_map) break else: left = set(range(graph2.dots)) - set(partial.part_map) for i in left:
def _RDFGraph(self): graph = Graph() for k,v in self.__dict__.iteritems(): if k == "URL": continue if k[0] == "_": continue if k in ["inputDataObjs", "outputDataObjs", "mutableDataObjs", "parameters"]: if k == "inputDataObjs": for ft, f in v.iteritems(): graph.add( (URIRef(self.URL), pypeNS["prereq"], URIRef(f.URL) ) ) elif k == "outputDataObjs": for ft, f in v.iteritems(): graph.add( (URIRef(f.URL), pypeNS["prereq"], URIRef(self.URL) ) ) elif k == "mutableDataObjs": for ft, f in v.iteritems(): graph.add( (URIRef(self.URL), pypeNS["hasMutable"], URIRef(f.URL) ) ) elif k == "parameters": graph.add( (URIRef(self.URL), pypeNS["hasParameters"], Literal(json.dumps(v)) ) ) continue if k in self.inputDataObjs: graph.add( ( URIRef(self.URL), pypeNS["inputDataObject"], URIRef(v.URL) ) ) continue if k in self.outputDataObjs: graph.add( ( URIRef(self.URL), pypeNS["outputDataObject"], URIRef(v.URL) ) ) continue if k in self.mutableDataObjs: graph.add( ( URIRef(self.URL), pypeNS["mutableDataObject"], URIRef(v.URL) ) ) continue if hasattr(v, "URL"): graph.add( ( URIRef(self.URL), pypeNS[k], URIRef(v.URL) ) ) graph.add( ( URIRef(self.URL), pypeNS["codeMD5digest"], Literal(self._codeMD5digest) ) ) graph.add( ( URIRef(self.URL), pypeNS["parameterMD5digest"], Literal(self._paramMD5digest) ) ) return graph
class PypeWorkflow(PypeObject): """ Representing a PypeWorkflow. PypeTask and PypeDataObjects can be added into the workflow and executed through the instanct methods. >>> import os, time >>> from pypeflow.data import PypeLocalFile, makePypeLocalFile, fn >>> from pypeflow.task import * >>> try: ... os.makedirs("/tmp/pypetest") ... _ = os.system("rm -f /tmp/pypetest/*") ... except Exception: ... pass >>> time.sleep(1) >>> fin = makePypeLocalFile("/tmp/pypetest/testfile_in", readOnly=False) >>> fout = makePypeLocalFile("/tmp/pypetest/testfile_out", readOnly=False) >>> @PypeTask(outputDataObjs={"test_out":fout}, ... inputDataObjs={"test_in":fin}, ... parameters={"a":'I am "a"'}, **{"b":'I am "b"'}) ... def test(self): ... print test.test_in.localFileName ... print test.test_out.localFileName ... os.system( "touch %s" % fn(test.test_out) ) ... pass >>> os.system( "touch %s" % (fn(fin)) ) 0 >>> from pypeflow.controller import PypeWorkflow >>> wf = PypeWorkflow() >>> wf.addTask(test) >>> def finalize(self): ... def f(): ... print "in finalize:", self._status ... return f >>> test.finalize = finalize(test) # For testing only. Please don't do this in your code. The PypeTask.finalized() is intended to be overriden by subclasses. >>> wf.refreshTargets( objs = [fout] ) /tmp/pypetest/testfile_in /tmp/pypetest/testfile_out in finalize: done True """ supportedURLScheme = ["workflow"] def __init__(self, URL=None, **attributes): if URL == None: URL = "workflow://" + __file__ + "/%d" % id(self) self._pypeObjects = {} PypeObject.__init__(self, URL, **attributes) self._referenceRDFGraph = None #place holder for a reference RDF def addObject(self, obj): self.addObjects([obj]) def addObjects(self, objs): """ Add data objects into the workflow. One can add also task object to the workflow using this method for non-threaded workflow. """ for obj in objs: if obj.URL in self._pypeObjects: if id(self._pypeObjects[obj.URL]) != id(obj): raise PypeError, "Add different objects with the same URL %s" % obj.URL else: continue self._pypeObjects[obj.URL] = obj def addTask(self, taskObj): self.addTasks([taskObj]) def addTasks(self, taskObjs): """ Add tasks into the workflow. The dependent input and output data objects are added automatically too. It sets the message queue used for communicating between the task thread and the main thread. One has to use addTasks() or addTask() to add task objects to a threaded workflow. """ for taskObj in taskObjs: if isinstance(taskObj, PypeTaskCollection): for subTaskObj in taskObj.getTasks( ) + taskObj.getScatterGatherTasks(): self.addObjects(subTaskObj.inputDataObjs.values()) self.addObjects(subTaskObj.outputDataObjs.values()) self.addObjects(subTaskObj.mutableDataObjs.values()) self.addObject(subTaskObj) else: for dObj in taskObj.inputDataObjs.values() +\ taskObj.outputDataObjs.values() +\ taskObj.mutableDataObjs.values() : if isinstance(dObj, PypeSplittableLocalFile): self.addObjects([dObj._completeFile]) self.addObjects([dObj]) self.addObject(taskObj) def removeTask(self, taskObj): self.removeTasks([taskObj]) def removeTasks(self, taskObjs): """ Remove tasks from the workflow. """ self.removeObjects(taskObjs) def removeObjects(self, objs): """ Remove objects from the workflow. If the object cannot be found, a PypeError is raised. """ for obj in objs: if obj.URL in self._pypeObjects: del self._pypeObjects[obj.URL] else: raise PypeError, "Unable to remove %s from the graph. (Object not found)" % obj.URL def updateURL(self, oldURL, newURL): obj = self._pypeObjects[oldURL] obj._updateURL(newURL) self._pypeObjects[newURL] = obj del self._pypeObjects[oldURL] @property def _RDFGraph(self): # expensive to recompute graph = Graph() for URL, obj in self._pypeObjects.iteritems(): for s, p, o in obj._RDFGraph: graph.add((s, p, o)) return graph def setReferenceRDFGraph(self, fn): self._referenceRDFGraph = Graph() self._referenceRDFGraph.load(fn) refMD5s = self._referenceRDFGraph.subject_objects( pypeNS["codeMD5digest"]) for URL, md5digest in refMD5s: obj = self._pypeObjects[str(URL)] obj.setReferenceMD5(md5digest) def _graphvizDot(self, shortName=False): graph = self._RDFGraph dotStr = StringIO() shapeMap = {"file": "box", "state": "box", "task": "component"} colorMap = {"file": "yellow", "state": "cyan", "task": "green"} dotStr.write('digraph "%s" {\n rankdir=LR;' % self.URL) for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme not in shapeMap: continue else: shape = shapeMap[URLParseResult.scheme] color = colorMap[URLParseResult.scheme] s = URL if shortName == True: s = URLParseResult.scheme + "://..." + URLParseResult.path.split( "/")[-1] dotStr.write('"%s" [shape=%s, fillcolor=%s, style=filled];\n' % (s, shape, color)) for row in graph.query('SELECT ?s ?o WHERE {?s pype:prereq ?o . }', initNs=dict(pype=pypeNS)): s, o = row if shortName == True: s = urlparse(s).scheme + "://..." + urlparse(s).path.split( "/")[-1] o = urlparse(o).scheme + "://..." + urlparse(o).path.split( "/")[-1] dotStr.write('"%s" -> "%s";\n' % (o, s)) for row in graph.query('SELECT ?s ?o WHERE {?s pype:hasMutable ?o . }', initNs=dict(pype=pypeNS)): s, o = row if shortName == True: s = urlparse(s).scheme + "://..." + urlparse(s).path.split( "/")[-1] o = urlparse(o).scheme + "://..." + urlparse(o).path.split( "/")[-1] dotStr.write('"%s" -- "%s" [arrowhead=both, style=dashed ];\n' % (s, o)) dotStr.write("}") return dotStr.getvalue() @property def graphvizDot(self): return self._graphvizDot() @property def graphvizShortNameDot(self): return self._graphvizDot(shortName=True) @property def makeFileStr(self): """ generate a string that has the information of the execution dependency in a "Makefile" like format. It can be written into a "Makefile" and executed by "make". """ for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme != "task": continue taskObj = self._pypeObjects[URL] if not hasattr(taskObj, "script"): raise TaskTypeError( "can not convert non shell script based workflow to a makefile" ) makeStr = StringIO() for URL in self._pypeObjects.keys(): URLParseResult = urlparse(URL) if URLParseResult.scheme != "task": continue taskObj = self._pypeObjects[URL] inputFiles = taskObj.inputDataObjs outputFiles = taskObj.outputDataObjs #for oStr in [o.localFileName for o in outputFiles.values()]: if 1: oStr = " ".join( [o.localFileName for o in outputFiles.values()]) iStr = " ".join([i.localFileName for i in inputFiles.values()]) makeStr.write("%s:%s\n" % (oStr, iStr)) makeStr.write("\t%s\n\n" % taskObj.script) makeStr.write("all: %s" % " ".join([o.localFileName for o in outputFiles.values()])) return makeStr.getvalue() @staticmethod def getSortedURLs(rdfGraph, objs): if len(objs) != 0: connectedPypeNodes = set() for obj in objs: if isinstance(obj, PypeSplittableLocalFile): obj = obj._completeFile for x in rdfGraph.transitive_objects(URIRef(obj.URL), pypeNS["prereq"]): connectedPypeNodes.add(x) tSortedURLs = PypeGraph(rdfGraph, connectedPypeNodes).tSort() else: tSortedURLs = PypeGraph(rdfGraph).tSort() return tSortedURLs def refreshTargets(self, objs=[], callback=(None, None, None)): """ Execute the DAG to reach all objects in the "objs" argument. """ tSortedURLs = self.getSortedURLs(self._RDFGraph, objs) for URL in tSortedURLs: obj = self._pypeObjects[URL] if not isinstance(obj, PypeTaskBase): continue else: obj() obj.finalize() self._runCallback(callback) return True def _runCallback(self, callback=(None, None, None)): if callback[0] != None and callable(callback[0]): argv = [] kwargv = {} if callback[1] != None and isinstance(callback[1], type(list())): argv = callback[1] else: raise TaskExecutionError("callback argument type error") if callback[2] != None and isinstance(callback[1], type(dict())): kwargv = callback[2] else: raise TaskExecutionError("callback argument type error") callback[0](*argv, **kwargv) elif callback[0] != None: raise TaskExecutionError("callback is not callable") @property def dataObjects(self): return [ o for o in self._pypeObjects.values() if isinstance(o, PypeDataObjectBase) ] @property def tasks(self): return [ o for o in self._pypeObjects.values() if isinstance(o, PypeTaskBase) ] @property def inputDataObjects(self): graph = self._RDFGraph inputObjs = [] for obj in self.dataObjects: r = graph.query('SELECT ?o WHERE {<%s> pype:prereq ?o . }' % obj.URL, initNs=dict(pype=pypeNS)) if len(r) == 0: inputObjs.append(obj) return inputObjs @property def outputDataObjects(self): graph = self._RDFGraph outputObjs = [] for obj in self.dataObjects: r = graph.query('SELECT ?s WHERE {?s pype:prereq <%s> . }' % obj.URL, initNs=dict(pype=pypeNS)) if len(r) == 0: outputObjs.append(obj) return outputObjs
""" distance = {n: -sys.maxint for n in graph.nodes()} queue, distance[graph.head] = [graph.head], 0 while any(queue): head = queue.pop() queue = list(graph.siblings(head)) + queue if not any(graph.siblings(head)): continue for sibling in graph.siblings(head): path = distance[head] + graph.edge(head, sibling) distance[sibling] = max(distance[sibling], path) return distance # ------------------------------------------------------------ # longest/shortest paths tests # ------------------------------------------------------------ if __name__ == "__main__": graph = Graph("s") graph.add_edge("s", "a", 1) graph.add_edge("s", "c", 2) graph.add_edge("c", "d", 3) graph.add_edge("c", "a", 4) graph.add_edge("a", "b", 6) graph.add_edge("b", "d", 1) graph.add_edge("b", "e", 2) graph.add_edge("d", "e", 1) print shortest_paths(graph) print longest_paths(graph)
def _RDFGraph(self): graph = Graph() for k, v in self.__dict__.iteritems(): if k == "URL": continue if k[0] == "_": continue if k in [ "inputDataObjs", "outputDataObjs", "mutableDataObjs", "parameters" ]: if k == "inputDataObjs": for ft, f in v.iteritems(): graph.add((URIRef(self.URL), pypeNS["prereq"], URIRef(f.URL))) elif k == "outputDataObjs": for ft, f in v.iteritems(): graph.add((URIRef(f.URL), pypeNS["prereq"], URIRef(self.URL))) elif k == "mutableDataObjs": for ft, f in v.iteritems(): graph.add((URIRef(self.URL), pypeNS["hasMutable"], URIRef(f.URL))) elif k == "parameters": graph.add((URIRef(self.URL), pypeNS["hasParameters"], Literal(json.dumps(v)))) continue if k in self.inputDataObjs: graph.add((URIRef(self.URL), pypeNS["inputDataObject"], URIRef(v.URL))) continue if k in self.outputDataObjs: graph.add((URIRef(self.URL), pypeNS["outputDataObject"], URIRef(v.URL))) continue if k in self.mutableDataObjs: graph.add((URIRef(self.URL), pypeNS["mutableDataObject"], URIRef(v.URL))) continue if hasattr(v, "URL"): graph.add((URIRef(self.URL), pypeNS[k], URIRef(v.URL))) graph.add((URIRef(self.URL), pypeNS["codeMD5digest"], Literal(self._codeMD5digest))) graph.add((URIRef(self.URL), pypeNS["parameterMD5digest"], Literal(self._paramMD5digest))) return graph
def parse_graph(self, file_path, model_name, category, sub_category): """Method to parse file and Create a corresponding Graph object. Reads a tflite file into a tflite/Model Object and then extracts operators, tensors, graph structure and metadata and stores it into a Graph, Node and Edge objects. Nodes are operations and edges are tensors. Args: file_path (str): Path of the file to parse model_name (str): Unique model name of the model being parsed. category (str): Problem category of the model. sub_category (str) : Problem sub category of the model. Returns: The Graph object created for the file. """ model = self.parse(file_path) nodes = list() edges = list() adj_list = dict() start_node_indices = list() # Global list of opcodes in the model, referenced by Operators opcodes = list() for opcode_index in range(model.OperatorCodesLength()): opcodes.append(model.OperatorCodes(opcode_index)) # Only considering the main model subgraph = model.Subgraphs(0) # Dictionary to store origin and destination nodes for each edge to_nodes = dict() from_nodes = dict() for tensor_index in range(subgraph.TensorsLength()): tensor = subgraph.Tensors(tensor_index) # Converting tensor to an Edge object new_edge = self._TENSOR_TO_EDGE.convert(tensor) edges.append(new_edge) # Populating to_nodes, from_nodes # Add proxy nodes for Input and Output of the model for input_index in range(subgraph.InputsLength()): new_node = Node.Node(label="Input_Placeholder", operator_type="Input_Placeholder") nodes.append(new_node) node_index = len(nodes) - 1 start_node_indices.append(node_index) edge_index = subgraph.Inputs(input_index) if edge_index not in from_nodes: from_nodes.update({edge_index: []}) from_nodes[edge_index].append(node_index) for operator_index in range(subgraph.OperatorsLength()): operator = subgraph.Operators(operator_index) builtin_opcode = opcodes[operator.OpcodeIndex()].BuiltinCode() opname = self._builtin_optype[builtin_opcode] new_node = self._OP_TO_NODE.convert(operator, opname) # Condition to extract Conv 2D filter sizes and # input and output channels as it is contained in tensors # and not in operators if new_node.label == "CONV_2D": weight_tensor = subgraph.Tensors(operator.Inputs(1)) new_node.filter_height = weight_tensor.Shape(1) new_node.filter_width = weight_tensor.Shape(2) nodes.append(new_node) node_index = len(nodes) - 1 for input_index in range(operator.InputsLength()): edge_index = operator.Inputs(input_index) if edge_index not in to_nodes: to_nodes.update({edge_index: list()}) to_nodes[edge_index].append(node_index) for output_index in range(operator.OutputsLength()): edge_index = operator.Outputs(output_index) if edge_index not in from_nodes: from_nodes.update({edge_index: list()}) from_nodes[edge_index].append(node_index) for output_index in range(subgraph.OutputsLength()): new_node = Node.Node(label="Output_Placeholder", operator_type="Output_Placeholder") nodes.append(new_node) node_index = len(nodes) - 1 edge_index = subgraph.Outputs(output_index) if edge_index not in to_nodes: to_nodes.update({edge_index: []}) to_nodes[edge_index].append(node_index) # Constructing adjacency List from to_nodes, from_nodes for edge_index in range(len(edges)): if edge_index not in from_nodes or edge_index not in to_nodes: continue for node1_index in from_nodes[edge_index]: for node2_index in to_nodes[edge_index]: if node1_index not in adj_list: adj_list.update({node1_index: list()}) adj_list[node1_index].append([edge_index, node2_index]) graph = Graph.Graph(nodes, start_node_indices, edges, adj_list, model_name, category, sub_category) # Removing nodes which are not reachable from input graph.process_nodes() graph.source = "TFLite" return graph
def parse_graph(self, file_path, model_name, category, sub_category, is_saved_model, input_operation_names): """Method to parse file and Create a corresponding Graph object. Reads a GraphDef from SavedModel or FrozenGraph file and extracts operations, tensors, graph structure and metadata and stores it into a Graph, Node and Edge objects. Nodes are operations and edges are tensors. If graph contains a 'StatefulPartitionedCall' operation, all operations are extracted and pushed into the database without tensor information or graph structure. Args: file_path (str): Path of the file to parse model_name (str): Unique model name of the model being parsed. category (str): Problem category of the model. sub_category (str) : Problem sub category of the model. is_saved_model (str, optional): "True" if file is in SavedModel format, defaults to "True". input_operation_names (list of str, optional) : Names of the operations that are inputs to the model, defaults to []. Returns: The Graph object created for the file. """ if is_saved_model == "True": saved_model = tf.core.protobuf.saved_model_pb2.SavedModel() with tf.io.gfile.GFile(file_path, "rb") as f: saved_model.ParseFromString(f.read()) meta_graph = saved_model.meta_graphs[0] graph_def = meta_graph.graph_def else: with tf.io.gfile.GFile(file_path, "rb") as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, name="") # Dictionary to store origin and destination nodes for each edge to_nodes = dict() from_nodes = dict() edges = list() nodes = list() start_node_indices = list() tensor_to_index = dict() # Loop to populate to_nodes and from_nodes for operation in graph.get_operations(): # If graph contains StatefulPartitionedCall operation, # only extracting the operations and returning empty graph if operation.node_def.op == "StatefulPartitionedCall": print( "Graphs with operation 'StatefulPartitionedCall' are " "not fully supported for parsing, graph or tensor " "information not supported, only operators will be " "loaded into database.") NODES_DISCARDED = [ "Const", "VarHandleOp", "StatefulPartitionedCall", "NoOp", "Identity" ] # List of operations to not be considered, not of semantic use. nodes.clear() # Looping over all ops in the graph for node_def in graph_def.node: op = node_def.op if op in NODES_DISCARDED or "VariableOp" in op: continue new_node = self._OP_TO_NODE.convert(None, node_def) nodes.append(new_node) # Looping over operations that occur within functions for func in graph_def.library.function: for node_def in func.node_def: op = node_def.op if op in NODES_DISCARDED or "VariableOp" in op: continue new_node = self._OP_TO_NODE.convert(None, node_def) nodes.append(new_node) # Discarding unwanted nodes for index, node in enumerate(nodes): if (node.operator_type in NODES_DISCARDED or "VariableOp" in node.operator_type): nodes.pop(index) new_graph = Graph.Graph(nodes, [], [], {}, model_name, category, sub_category) new_graph.source = "TF" return new_graph if operation.node_def.op == "Const": continue # Converting operation to nodes new_node = self._OP_TO_NODE.convert(operation, operation.node_def) node_index = len(nodes) nodes.append(new_node) # Add input_operation_names to start_node_indices if operation.name in input_operation_names: start_node_indices.append(node_index) # Input node, also the start node to the graph if operation.node_def.op == "Placeholder": new_node.label = "Input_Placeholder" start_node_indices.append(node_index) # populating from_nodes and to_nodes for in_tensor in list(operation.inputs): if in_tensor not in tensor_to_index: tensor_to_index[in_tensor] = len(edges) new_edge = self._TENSOR_TO_EDGE.convert(in_tensor) edges.append(new_edge) edge_index = tensor_to_index[in_tensor] if edge_index not in to_nodes: to_nodes.update({edge_index: []}) to_nodes[edge_index].append(node_index) for out_tensor in list(operation.outputs): if out_tensor not in tensor_to_index: tensor_to_index[out_tensor] = len(edges) new_edge = self._TENSOR_TO_EDGE.convert(out_tensor) edges.append(new_edge) edge_index = tensor_to_index[out_tensor] if edge_index not in from_nodes: from_nodes.update({edge_index: []}) from_nodes[edge_index].append(node_index) # Creating and adjacency list using from_nodes and to_nodes adj_list = dict() for edge_index in range(len(edges)): if edge_index not in from_nodes or edge_index not in to_nodes: continue for node1_index in from_nodes[edge_index]: for node2_index in to_nodes[edge_index]: if node1_index not in adj_list: adj_list.update({node1_index: list()}) adj_list[node1_index].append([edge_index, node2_index]) if len(start_node_indices) == 0: print( "Graph contains no input placeholders, cannot parse graph." ) return None graph = Graph.Graph(nodes, start_node_indices, edges, adj_list, model_name, category, sub_category) # Removing nodes which are not reachable from input graph.process_nodes() graph.source = "TF" return graph
def longest_increasing_subsequence_path(coll): graph = Graph(min(coll)) for i, a in enumerate(coll): for b in coll[i:]: if a < b: graph.add_edge(a, b, 1) return longest_paths(graph)