def test_ExportArchiFolderModels(cleandir): if __name__ == u"__main__": cleandir() assert (os.path.isfile(fileArchimateTest) is True) al = ArchiLib(fileArchimateTest) folder = u"Scenarios" logger.info(u"Exporting Folder : %s" % folder) listMTE = al.getModelsInFolder(folder) assert (listMTE is not None) logger.info(u"len(listMTE) = %d" % len(listMTE)) assert (len(listMTE) == 2) concepts = Concepts(u"Export", u"Pickle") for ModelToExport in listMTE: logger.info(u" Model : %s" % ModelToExport) d = concepts.addConceptKeyType(ModelToExport, u"Model") al.recurseModel(ModelToExport) al.outputCSVtoFile(concepts, fileExport=fileCSVExport) assert (os.path.isfile(fileCSVExport) is True) Concepts.saveConcepts(concepts, fileConceptsExport) logger.info(u"Save Concepts : %s" % fileConceptsExport) assert (os.path.isfile(fileConceptsExport) is True)
def findSimilarties(self): logger.info(u"Compute Similarity") self.conceptsSimilarity = Concepts(u"ConceptsSimilarity", u"Similarities") # Compute similarity between documents / concepts similarityThreshold = self.similarity for document in self.documentsList: indexNum = self.documentsList.index(document) self.df = self.concepts.getConcepts().keys() logger.info(u"++conceptsDoc %s" % (self.df[indexNum])) logger.info(u" documentsList[" + str(indexNum) + u"]=" + u"".join(x + u" " for x in document)) # Show common topics d = [unicode(x).strip().replace(u"'", u"") for x in document] e = [unicode(y).strip().replace(u"\"", u"") for y in self.listTopics] s1 = set(e) s2 = set(d) common = s1 & s2 lc = [x for x in common] logger.info(u" Common Topics : %s{%s}" % (lc, self.al.dictName[document][ARCHI_TYPE])) self.doComputation(indexNum, similarityThreshold, tfAddWords=True) Concepts.saveConcepts(self.conceptsSimilarity, conceptsSimilarityFile) logger.info(u"Saved Concepts : %s" % conceptsSimilarityFile) return self.conceptsSimilarity
def exportNeo4JToConcepts(self, concepts, fileNodes=u"nodes.p"): qs = u"Match n return n" lq, qd = self.cypherQuery(qs) for x in lq: if len(x) == 2: logger.info(u"%s[%s]" % (x[0], x[1])) concepts.addConceptKeyType(x[0], x[1]) else: logger.warn(u"Not a standard node : %d : %s" % (len(x), x)) # Match r relations qs = u"match n-[r]-m return n, r, m" lq, qd = self.cypherQuery(qs) for x in lq: if len(x) == 6: logger.info(u"%s[%s]" % (x[0], x[1])) concepts.addConceptKeyType(x[0], x[1]) else: logger.warn(u"Not a standard node : %d : %s" % (len(x), x)) Concepts.saveConcepts(concepts, fileNodes) return concepts
def requirementAnalysis(fileArchimate=None): if fileArchimate is None: fileArchimate = u"/Users/morrj140/Documents/SolutionEngineering/Archimate Models/DVC v38.archimate" al = ArchiLib(fileArchimate) conceptsFile = fileConceptsRequirements searchTypes = list() searchTypes.append(u"archimate:Requirement") nl = al.getTypeNodes(searchTypes) logger.info(u"Find Words in Requirements...") concepts = Concepts(u"Requirement", u"Requirements") n = 0 for sentence in nl: n += 1 logger.debug(u"%s" % sentence) c = concepts.addConceptKeyType(u"Document" + str(n), u"Document") d = c.addConceptKeyType(sentence, u"Sentence" + str(n)) if True and sentence is not None: cleanSentence = ' '.join([word for word in sentence.split(u" ") if word not in stop]) for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)): if len(word) > 1 and pos[0] == u"N": e = d.addConceptKeyType(word, u"Word") f = e.addConceptKeyType(pos, u"POS") Concepts.saveConcepts(concepts, conceptsFile) logger.info(u"Saved : %s" % conceptsFile) chunks = Chunks(concepts) chunks.createChunks()
def collectDependancyAnalysisNodes(self): count = 0 listTSort = list() for x in self.al.dictEdges.keys(): logger.debug(u"[%s]=%s" % (self.al.dictEdges[x][u"id"], x)) if u"source" in self.al.dictEdges[x]: source = self.al.dictEdges[x][u"source"] target = self.al.dictEdges[x][u"target"] logger.debug(u" Rel : %s" % (self.al.dictEdges[x][ARCHI_TYPE])) if self.al.dictEdges[x][ARCHI_TYPE] in (u"archimate:FlowRelationship"): # al.countNodeType(al.dictNodes[source][ARCHI_TYPE]) # al.countNodeType(al.dictNodes[target][ARCHI_TYPE]) # al.countNodeType(al.dictEdges[x][ARCHI_TYPE]) if (self.al.dictNodes[source][ARCHI_TYPE] == u"archimate:BusinessProcess") and \ self.al.dictNodes[target][ARCHI_TYPE] == u"archimate:BusinessProcess": sourceName = self.al.getNodeName(source) targetName = self.al.getNodeName(target) if sourceName[0].isdigit() or targetName[0].isdigit(): continue logger.debug(u" %s:%s" % (sourceName, targetName)) l = list() sc = self.findConcept(self.concepts, sourceName) if sc is None: logger.debug(u"New Target - %s" % sourceName) sc = self.concepts.addConceptKeyType(self.al.getNodeName(source), u"Source") self.getWords(sourceName, sc) else: logger.debug(u"Prior Target %s" % sourceName) tc = self.findConcept(self.concepts, targetName) if tc is None: logger.debug(u"New Target %s" % targetName) tc = sc.addConceptKeyType(self.al.getNodeName(target), u"Target") self.getWords(sourceName, tc) else: logger.debug(u"Prior Target %s" % targetName) sc.addConcept(tc) l.append(target) l.append(source) listTSort.append(l) logger.debug(u"Edges = %s" % listTSort) Concepts.saveConcepts(self.concepts, fileConceptsTraversal) self.dependancyAnalysis(listTSort) return self.concepts, listTSort
def test_ExportArchi(cleandir): if __name__ == u"__main__": cleandir() logger.info(u"Using : %s" % fileArchimateTest) assert (os.path.isfile(fileArchimateTest) is True) al = None concepts = None al = ArchiLib(fileArchimateTest) assert (al is not None) concepts = Concepts(u"Node", u"Nodes") assert (concepts is not None) logger.info(u"Found %d Nodes" % len(al.dictNodes)) logger.info(u"Found %d Edges" % len(al.dictEdges)) assert (len(al.dictNodes) == 45) assert (len(al.dictEdges) == 36) count = 0 listTSort = list() for x in al.dictEdges.keys(): logger.info(u"[%s]=%s" % (al.dictEdges[x][u"id"], x)) if u"source" in al.dictEdges[x]: source = al.dictEdges[x][u"source"] target = al.dictEdges[x][u"target"] logger.info(u" Rel : %s" % (al.dictEdges[x][ARCHI_TYPE])) sourceName = al.getNodeName(source) targetName = al.getNodeName(target) logger.info( u" %s--%s--%s" % (sourceName, al.dictEdges[x][ARCHI_TYPE][10:], targetName)) sc = concepts.addConceptKeyType( sourceName, al.dictNodes[source][ARCHI_TYPE][10:]) # getWords(sourceName, sc) tc = sc.addConceptKeyType(targetName, al.dictNodes[target][ARCHI_TYPE][10:]) # getWords(sourceName, tc) Concepts.saveConcepts(concepts, fileConceptsExport) assert (len(concepts.cd) == 17) assert (os.path.isfile(fileConceptsExport) is True) assert (concepts.typeName == u"Nodes")
def saveTopics(self, topics): wordConcepts = Concepts(u"TopicConcepts", u"Topics") for topic in topics: logger.debug(u"Topic:" + topic[0]) w = wordConcepts.addConceptKeyType(topic[0], u"Topic") w.count = topic[1] Concepts.saveConcepts(wordConcepts, self.topicsFile) return wordConcepts
def _saveConcepts(self): logger.info(u"Saving %s" % self.documentsConceptsFile) Concepts.saveConcepts(self.documentsConcepts, self.documentsConceptsFile) logger.info(u"Saving %s" % self.wordsConceptsFile) Concepts.saveConcepts(self.wordsConcepts, self.wordsConceptsFile) logger.info(u"Saving Documents %s" % os.getcwd() + os.sep + self.documentsConceptsFile) logger.info(u"Saving Words%s" % os.getcwd() + os.sep + self.wordsConceptsFile)
def __init__(self, fileArchimate): self.fileArchimate = fileArchimate logger.info(u"Using : %s" % self.fileArchimate) self.al = ArchiLib(fileArchimate) self.concepts = Concepts(u"BusinessProcess", u"archimate:BusinessProcess")
def exportArchi(self): m = hashlib.md5() concepts = Concepts(u"Node", u"Nodes") logger.info(u"Found %d Nodes" % len(self.al.dictNodes)) logger.info(u"Found %d Edges" % len(self.al.dictEdges)) count = 0 listTSort = list() for x in self.al.dictEdges.keys(): logger.debug(u"Edge [%s]=%s" % (self.al.dictEdges[x], x)) if self.al.dictEdges[x].has_key(u"source") and self.al.dictEdges[x].has_key(u"target"): typeEdge = self.al.dictEdges[x][ARCHI_TYPE] logger.debug(u"Edge : %s" % typeEdge) source = self.al.dictEdges[x][u"source"] logger.debug(u"Source : %s" % source) target = self.al.dictEdges[x][u"target"] logger.debug(u"Target : %s" % target) logger.debug(u" Rel : %s" % (self.al.dictEdges[x][ARCHI_TYPE])) sourceName = self.al.getNodeName(source) targetName = self.al.getNodeName(target) logger.debug(u" %s--%s--%s" % (sourceName, self.al.dictEdges[x][ARCHI_TYPE][10:], targetName)) if source in self.al.dictNodes: l = list() sc = concepts.addConceptKeyType(sourceName, self.al.dictNodes[source][ARCHI_TYPE][10:]) # getWords(sourceName, sc) nameEdge = u"(" + sourceName + u"," + targetName + u")" logger.debug(u"nameEdge : %s[%d]" % (nameEdge, len(nameEdge))) logger.debug(u"typeEdge : %s" % typeEdge[10:]) ne = str(self.al.cleanString(nameEdge)) hl = hashlib.sha224(str(ne)).hexdigest() logger.debug(u"hash : %s" % hl) nh = u"%s-%s" % (typeEdge[10:], hl) rc = sc.addConceptKeyType(nh, typeEdge[10:]) if self.al.dictNodes.has_key(target): tc = rc.addConceptKeyType(targetName, self.al.dictNodes[target][ARCHI_TYPE][10:]) # getWords(sourceName, tc) Concepts.saveConcepts(concepts, self.fileConceptsExport) return concepts
def __init__(self, concepts=None): if concepts is None: logger.info(u"Loading : %s" % self.conceptFile) self.concepts = Concepts.loadConcepts(self.conceptFile) else: logger.info(u"Using : %s" % concepts.name) self.concepts = concepts self.chunkConcepts = Concepts(u"Chunk", u"Chunks")
def test_ExportArchi(cleandir): if __name__ == u"__main__": cleandir() logger.info(u"Using : %s" % fileArchimateTest) assert (os.path.isfile(fileArchimateTest) is True) al = None concepts = None al = ArchiLib(fileArchimateTest) assert (al is not None) concepts = Concepts(u"Node", u"Nodes") assert (concepts is not None) logger.info(u"Found %d Nodes" % len(al.dictNodes)) logger.info(u"Found %d Edges" % len(al.dictEdges)) assert (len(al.dictNodes) == 45) assert (len(al.dictEdges) == 36) count = 0 listTSort = list() for x in al.dictEdges.keys(): logger.info(u"[%s]=%s" % (al.dictEdges[x][u"id"], x)) if u"source" in al.dictEdges[x]: source = al.dictEdges[x][u"source"] target = al.dictEdges[x][u"target"] logger.info(u" Rel : %s" % (al.dictEdges[x][ARCHI_TYPE])) sourceName = al.getNodeName(source) targetName = al.getNodeName(target) logger.info(u" %s--%s--%s" % (sourceName, al.dictEdges[x][ARCHI_TYPE][10:], targetName)) sc = concepts.addConceptKeyType(sourceName, al.dictNodes[source][ARCHI_TYPE][10:]) # getWords(sourceName, sc) tc = sc.addConceptKeyType(targetName, al.dictNodes[target][ARCHI_TYPE][10:]) # getWords(sourceName, tc) Concepts.saveConcepts(concepts, fileConceptsExport) assert(len(concepts.cd) == 17) assert (os.path.isfile(fileConceptsExport) is True) assert(concepts.typeName == u"Nodes")
def PPTXCrawl(filePPTX): logger.info(u"Using : %s" % filePPTX) cpptx = PPTXCreateArchil(filePPTX) c = cpptx.crawlPPTX() c.logConcepts() Concepts.saveConcepts(c, fileConceptsPPTX)
def __init__(self, conceptFile=None): if conceptFile == None: conceptFile = u"documents.p" logger.info(u"Load Concepts from %s " % (conceptFile)) self.concepts = Concepts.loadConcepts(conceptFile) logger.info(u"Loaded Concepts") self.conceptsNGram = Concepts(u"n-gram", u"NGRAM") self.conceptsNGramScore = Concepts(u"NGram_Score", u"Score") self.conceptsNGramSubject = Concepts(u"Subject", u"Subjects")
def test_PPTXCrawl(fileArchimate): assert (os.path.isfile(filePPTXIn) is True) logger.info(u"Using : %s" % filePPTXIn) cpptx = PPTXCreateArchil(filePPTXIn, fileArchimate) c = cpptx.crawlPPTX() Concepts.saveConcepts(c, fileConceptsPPTX) assert (os.path.isfile(fileConceptsPPTX) is True)
def PPTXCreateArchi(): start_time = ArchiLib.startTimer() logger.info(u"Using : %s" % filePPTXIn) cpptx = PPTXCreateArchil(filePPTXIn, fileArchimateTest) c = cpptx.crawlPPTX() c.logConcepts() Concepts.saveConcepts(c, fileConceptsPPTX) ArchiLib.stopTimer(start_time)
def importConceptsIntoNeo4J(fileArchimate, gdb, ClearNeo4J=True): icnj = ConceptsImportNeo4J(fileArchimate, gdb, ClearNeo4J=ClearNeo4J) importConcepts = Concepts.loadConcepts(fileConceptsExport) icnj.importNeo4J(importConcepts)
def importConceptsIntoArchi(): logger.info(u"Using : %s" % fileArchimateTest) conceptFile = fileConceptsBatches logger.info(u"Loading :" + conceptFile) concepts = Concepts.loadConcepts(conceptFile) al = ArchiLib() # Create Subfolder folder = u"Implementation & Migration" subfolder = u"Dependancy Analysis - %s" % time.strftime(u"%Y%d%m_%H%M%S") attrib = dict() attrib[u"id"] = al.getID() attrib[u"name"] = subfolder al.insertNode(u"folder", folder, attrib) logger.info(u"--- Insert Nodes ---") insertConceptNode(al, concepts, subfolder) logger.info(u"--- Insert Relations ---") insertConceptRelation(al, concepts) al.outputXMLtoFile(filename=u"import_concepts.archimate")
def createArchimateConcepts(fileArchimate, fileConceptsArch): logger.info(u"Using : %s" % fileArchimate) concepts = Concepts(fileArchimateModel, u"Archimate") al = ArchiLib(fileArchimate) al.logTypeCounts() # # Create Concepts from Archimate # al.folderConcepts(concepts) Concepts.saveConcepts(concepts, fileConceptsArch) logger.info(u"Saved concepts to : %s" % fileConceptsArch)
def conceptsImportNeo4J(fileArchimate, gdb): logger.info(u"Using : %s" % fileConceptsExport) importConcepts = Concepts.loadConcepts(fileConceptsExport) in4j = ConceptsImportNeo4J(fileArchimateTest, gdbTest, ClearNeo4J=True) in4j.importNeo4J(importConcepts)
def test_importConceptsIntoNeo4J(fileArchimate, gdb): assert (os.path.isfile(fileArchimate) is True) icnj = ConceptsImportNeo4J(fileArchimate, gdb, ClearNeo4J=True) assert (os.path.isfile(fileConceptsExport) is True) importConcepts = Concepts.loadConcepts(fileConceptsExport) icnj.importNeo4J(importConcepts)
def exportArchiFolderModels(self, folder): logger.info(u"Exporting Folder : %s" % folder) listMTE = self.al.getModelsInFolder(folder) concepts = Concepts(u"Export", u"Pickle") for ModelToExport in listMTE: logger.info(u" Model : %s" % ModelToExport) d = concepts.addConceptKeyType(ModelToExport, u"Model") self.al.recurseModel(ModelToExport, d) self.al.outputCSVtoFile(concepts, fileCSVExport) Concepts.saveConcepts(concepts, self.conceptsFile) logger.info(u"Save Concepts : %s" % self.conceptsFile)
def getOpenXmlText(filename, ftype): logger.info("OpenXmlText: %s" % filename) document = openxmldoc doc = openxmllib.openXmlDocument(path=filename) c = Concepts(filename, ftype) logger.debug ("%s\n" % (doc.allProperties)) ap = c.addConceptKeyType("allProperties","PROPERTIES") for x in doc.allProperties: logger.info("cp %s:%s" % (x, doc.allProperties[x])) ap.addConceptKeyType(doc.allProperties[x], x) logger.info("it %s\n" % (doc.indexableText(include_properties=True))) c.addConceptKeyType(doc.indexableText(include_properties=True),"TEXT") return c
def createTagCloud(conceptFile, topic): start_time = ArchiLib.startTimer() concepts = Concepts.loadConcepts(conceptFile) tc = TopicCloud(concepts, font_path=u"/Users/morrj140/Fonts/DroidSans.ttf", imageFile=u"Topics.png") tc.createTagCloud(topic) ArchiLib.stopTimer(start_time)
def __init__(self, fileCrawl, fileArchimate): self.EMU = 914400.0 self.fileArchimate = fileArchimate self.path_to_presentation = fileCrawl self.dictNodes = dict() self.dictEdges = dict() self.dictText = dict() self.dictNodeXY = dict() self.dictTextXY = dict() self.al = ArchiLib(fileArchimate) self.graph = GraphVizGraph() # self.graph = NetworkXGraph() # self.graph = PatternGraph() self.prs = Presentation(self.path_to_presentation) self.concepts = Concepts(u"Application", u"Relations")
def test_ArchimateConcepts(cleandir): if __name__ == u"__main__": cleandir() logger.info(u"Using : %s" % fileArchimateTest) assert (os.path.isfile(fileArchimateTest) is True) concepts = Concepts(fileConceptsArch, u"Archimate") al = ArchiLib(fileArchimateTest) # # Create Concepts from Archimate # al.folderConcepts(concepts) Concepts.saveConcepts(concepts, fileConceptsArch) logger.info(u"Saved concepts to : %s" % fileConceptsArch) assert (os.path.isfile(fileConceptsArch) is True)
def test_ExportArchiModel(cleandir): if __name__ == u"__main__": cleandir() assert (os.path.isfile(fileArchimateTest) is True) al = ArchiLib(fileArchimateTest) listMTE = list() listMTE.append(u"01. Market to Leads") concepts = Concepts(u"Export", u"Model") for ModelToExport in listMTE: al.recurseModel(ModelToExport) Concepts.saveConcepts(concepts, fileConceptsExport) assert (os.path.isfile(fileConceptsExport) is True) al.outputCSVtoFile(concepts, fileCSVExport) assert (os.path.isfile(fileCSVExport) is True)
def test_CreateArchimateConcepts(cleandir, fileArchimate): assert (os.path.isfile(fileArchimate) is True) logger.info(u"Using : %s" % fileArchimate) concepts = Concepts(fileArchimate, u"Archimate") al = ArchiLib(fileArchimate) lc = al.logTypeCounts() assert (len(lc) > 0) # # Create Concepts from Archimate # al.folderConcepts(concepts) Concepts.saveConcepts(concepts, fileConceptsArch) logger.info(u"Saved concepts to : %s" % fileConceptsArch) assert (os.path.isfile(fileConceptsArch) is True)
def getPDFText(filename): logger.info("filename: %s" % filename) newparatextlist = [] pdfDoc = PdfFileReader(file(filename, "rb")) pdfDict = pdfDoc.getDocumentInfo() c = Concepts(filename, "PDF") for x in pdfDict.keys(): try: c.addConceptKeyType(x[1:], pdfDict[x]) except: logger.warn("ops...") #c.logConcepts() for page in pdfDoc.pages: text = page.extractText() logger.info("PDF : %s" % text) newparatextlist.append(text + ". ") return newparatextlist
def test_RequirementAnalysis(cleandir, fileArchimate): assert (os.path.isfile(filePPTXIn) is True) al = ArchiLib(fileArchimate) conceptsFile = fileConceptsRequirements searchTypes = list() searchTypes.append(u"archimate:Requirement") nl = al.getTypeNodes(searchTypes) logger.info(u"Find Words in Requirements...") concepts = Concepts(u"Requirement", u"Requirements") n = 0 for sentence in nl: n += 1 logger.debug(u"%s" % sentence) c = concepts.addConceptKeyType(u"Document" + unicode(n), u"Document") d = c.addConceptKeyType(sentence, u"Sentence" + unicode(n)) if True and sentence is not None: cleanSentence = ' '.join([word for word in sentence.split(" ") if word not in stop]) for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)): if len(word) > 1 and pos[0] == u"N": e = d.addConceptKeyType(word, u"Word") f = e.addConceptKeyType(pos, u"POS") Concepts.saveConcepts(concepts, conceptsFile) logger.info(u"Saved : %s" % conceptsFile) assert (os.path.isfile(conceptsFile) is True) chunks = Chunks(concepts) chunks.createChunks() assert (os.path.isfile(fileConceptsChunks) is True)
def analyzeNamedEntities(self): rels = (u"archimate:AccessRelationship", u"archimate:SpecialisationRelationship", u"archimate:CompositionRelationship", u"archimate:AggregationRelationship") listType = (u"archimate:Requirement", ) dictEntities = self.al.getTypeNodes(listType) concepts = Concepts(u"Entities", u"BusinessObject") for x in self.al.dictEdges.keys(): try: logger.debug(u"[%s]=%s" % (x, self.al.dictEdges[x][ARCHI_TYPE])) source = self.al.dictEdges[x][u"source"] target = self.al.dictEdges[x][u"target"] logger.debug(u" Source : %s" % source) logger.debug(u" Target : %s" % target) except: logger.warn(u"[%s] ARCH_TYPE Exception" % (x)) continue if self.al.dictEdges[x][ARCHI_TYPE] in rels: logger.info(u"%s -> [ %s ] -> %s" % (self.al.dictNodes[source][u"name"][:20], self.al.dictEdges[x][ARCHI_TYPE], self.al.dictNodes[target][u"name"][:20])) listNodes = self.al.getEdgesForNode(source, rels) for x in listNodes: logger.debug(u" %s" % (x)) Concepts.saveConcepts(concepts, fileConceptsRelations)
def graphConcepts(graph, conceptFile): concepts = Concepts.loadConcepts(conceptFile) # concepts.logConcepts() graph.addGraphNodes(concepts) graph.addGraphEdges(concepts) if isinstance(graph, NetworkXGraph): graph.saveJSON(concepts) if isinstance(graph, GraphVizGraph): graph.exportGraph() if isinstance(graph, PatternGraph): graph.exportGraph()
def createArchiFromConcepts(fileArchimate, fileConceptsImport, fileArchimateImport): logger.info(u"Using : %s" % fileArchimate) logger.info(u"Loading :" + fileConceptsImport) ic = ConceptsImportArchi(fileArchimate, fileConceptsImport) concepts = Concepts.loadConcepts(fileConceptsImport) # Create Subfolder folder = u"Implementation & Migration" subfolder = u"Dependancy Analysis - %s" % time.strftime(u"%Y%d%m_%H%M%S") ic.importConcepts(concepts, folder, subfolder) ic.exportXML(fileArchimateImport)
def analyzeNetworkX(self, concepts, fileConceptsExport=None): if concepts is None and fileConceptsExport is not None: concepts = Concepts.loadConcepts(fileConceptsExport) logger.info(u" Concepts : %s[%d][%s]" % (concepts.name, len(concepts.getConcepts()), concepts.typeName)) self.graph = NetworkXGraph() logger.info(u"Adding NetworkX nodes to the graph ...") self.addGraphNodes(concepts) logger.info(u"Adding NetworkX edges to the graph ...") self.addGraphEdges(concepts) gl = nx.pagerank(self.graph.G) self.analyzeGraph(gl, u"PageRank")
def test_ConceptsImportArchi(): start_time = ArchiLib.startTimer() logger.info(u"Using : %s" % fileArchimateTest) logger.info(u"Loading :" + fileConceptsExport) ic = ConceptsImportArchi(fileArchimateTest, fileConceptsExport) concepts = Concepts.loadConcepts(fileConceptsExport) # Create Subfolder folder = u"Implementation & Migration" subfolder = u"Dependancy Analysis - %s" % time.strftime(u"%Y%d%m_%H%M%S") ic.importConcepts(concepts, folder, subfolder) ic.exportXML() ArchiLib.stopTimer(start_time)
def createArchimate(self, fileArchiModel, fileArchiP): archi = Concepts.loadConcepts(fileArchiP) rootName = etree.QName(ARCHIMATE_NS, u'model') root = etree.Element(rootName, version=u"2.6.0", name=fileArchiP, id=u"02cec69f", nsmap=NS_MAP) xmlSheet = etree.ElementTree(root) self.createArchimateElements(xmlSheet, archi, root) output = StringIO.StringIO() xmlSheet.write(output, pretty_print=True) logger.debug(u"%s" % (output.getvalue())) f = open(fileArchiModel, u'w') f.write(output.getvalue()) f.close() output.close()
class DependancyAnalysis(object): def __init__(self, fileArchimate): self.fileArchimate = fileArchimate logger.info(u"Using : %s" % self.fileArchimate) self.al = ArchiLib(fileArchimate) self.concepts = Concepts(u"BusinessProcess", u"archimate:BusinessProcess") # "Batches" are sets of tasks that can be run together def get_task_batches(self, nodes): # Build a map of node names to node instances name_to_instance = dict((n.name, n) for n in nodes ) for x in name_to_instance.keys(): logger.debug(u"name_to_instance[%s]=%s : %s" % (x, name_to_instance[x].name, name_to_instance[x].depends)) # Build a map of node names to dependency names name_to_deps = dict( (n.name, set(n.depends)) for n in nodes ) for x in name_to_deps.keys(): logger.debug(u"name_to_deps[%s]=%s" % (x, name_to_deps[x])) # This is where we'll store the batches batches = [] n = 0 # While there are dependencies to solve... while name_to_deps: logger.info(u"length %d" % len(name_to_deps)) # Get all nodes with no dependencies ready = {name for name, deps in name_to_deps.iteritems() if not deps} n += 1 logger.info(u"iteration : %d" % n) for x in ready: logger.info(u"No Dep %s" % (x)) # If there aren't any, we have a loop in the graph if not ready: msg = u"Circular dependencies found!\n" msg += self.format_dependencies(name_to_deps) raise ValueError(msg) # Remove them from the dependency graph for name in ready: del name_to_deps[name] for deps in name_to_deps.itervalues(): deps.difference_update(ready) # Add the batch to the list batches.append( {name_to_instance[name] for name in ready} ) # Return the list of batches return batches # Format a dependency graph for printing def format_dependencies(self, name_to_deps): msg = [] for name, deps in name_to_deps.iteritems(): for parent in deps: msg.append(u"%s -> %s" % (name, parent)) return "\n".join(msg) # Create and format a dependency graph for printing def format_nodes(self, nodes): return self.format_dependencies(dict( (n.name, n.depends) for n in nodes )) def findConcept(self, concepts, name, n=0): n += 1 c = None if n == 3: return c for x in concepts.getConcepts().values(): if x.name == name: return x else: c = self.findConcept(x, name, n) return c def getWords(self, s, concepts): lemmatizer = WordNetLemmatizer() for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(s)): if len(word) > 1 and pos[0] == u"N": lemmaWord = lemmatizer.lemmatize(word.lower()) e = concepts.addConceptKeyType(lemmaWord, u"Word") f = e.addConceptKeyType(pos, u"POS") def collectDependancyAnalysisNodes(self): count = 0 listTSort = list() for x in self.al.dictEdges.keys(): logger.debug(u"[%s]=%s" % (self.al.dictEdges[x][u"id"], x)) if u"source" in self.al.dictEdges[x]: source = self.al.dictEdges[x][u"source"] target = self.al.dictEdges[x][u"target"] logger.debug(u" Rel : %s" % (self.al.dictEdges[x][ARCHI_TYPE])) if self.al.dictEdges[x][ARCHI_TYPE] in (u"archimate:FlowRelationship"): # al.countNodeType(al.dictNodes[source][ARCHI_TYPE]) # al.countNodeType(al.dictNodes[target][ARCHI_TYPE]) # al.countNodeType(al.dictEdges[x][ARCHI_TYPE]) if (self.al.dictNodes[source][ARCHI_TYPE] == u"archimate:BusinessProcess") and \ self.al.dictNodes[target][ARCHI_TYPE] == u"archimate:BusinessProcess": sourceName = self.al.getNodeName(source) targetName = self.al.getNodeName(target) if sourceName[0].isdigit() or targetName[0].isdigit(): continue logger.debug(u" %s:%s" % (sourceName, targetName)) l = list() sc = self.findConcept(self.concepts, sourceName) if sc is None: logger.debug(u"New Target - %s" % sourceName) sc = self.concepts.addConceptKeyType(self.al.getNodeName(source), u"Source") self.getWords(sourceName, sc) else: logger.debug(u"Prior Target %s" % sourceName) tc = self.findConcept(self.concepts, targetName) if tc is None: logger.debug(u"New Target %s" % targetName) tc = sc.addConceptKeyType(self.al.getNodeName(target), u"Target") self.getWords(sourceName, tc) else: logger.debug(u"Prior Target %s" % targetName) sc.addConcept(tc) l.append(target) l.append(source) listTSort.append(l) logger.debug(u"Edges = %s" % listTSort) Concepts.saveConcepts(self.concepts, fileConceptsTraversal) self.dependancyAnalysis(listTSort) return self.concepts, listTSort def dependancyAnalysis(self, listTSort): index = 0 for x in listTSort: logger.debug(u"%d %s[%s] -%s-> %s[%s]" % (index, self.al.dictNodes[x[0]][u"name"], self.al.dictNodes[x[0]][ARCHI_TYPE], u"UsedBy", self.al.dictNodes[x[1]][u"name"], self.al.dictNodes[x[1]][ARCHI_TYPE])) index += 1 self.al.addToNodeDict(self.al.dictNodes[x[0]][u"name"], self.al.dictBP) self.al.addToNodeDict(self.al.dictNodes[x[1]][u"name"], self.al.dictBP) logger.info(u"Topic Sort Candidates : %d" % (len(listTSort))) nodes = list() index = 0 dictTasks = dict() for x in listTSort: sname = self.al.dictNodes[x[0]][u"name"] tname = self.al.dictNodes[x[1]][u"name"] index += 1 logger.debug(u"%d %s -%s-> %s" % (index, sname, u"UsedBy", tname)) if sname in dictTasks: ln = dictTasks[sname] ln.append(tname) else: ln = list() ln.append(tname) dictTasks[sname] = ln for x in dictTasks.keys(): logger.debug(u"dictTasks[%s]=%s" % (x, dictTasks[x])) a = Task(x, dictTasks[x]) nodes.append(a) for x in self.al.dictBP.keys(): # for x in listBP: if x not in dictTasks: logger.debug(u"Add %s" % (x)) a = Task(x, list()) nodes.append(a) self.format_nodes(nodes) conceptBatches = Concepts(u"Batch", u"archimate:WorkPackage") n = 0 logger.info(u"Batches:") batches = self.get_task_batches(nodes) for bundle in batches: n += 1 name = u"Batch %d" % n c = conceptBatches.addConceptKeyType(name, u"archimate:WorkPackage") for node in bundle: c.addConceptKeyType(node.name, u"archimate:BusinessProcess") logger.info(u"%d : %s" % (n, ", ".join(node.name.lstrip() for node in bundle))) Concepts.saveConcepts(conceptBatches, fileConceptsBatches) return conceptBatches
def dependancyAnalysis(self, listTSort): index = 0 for x in listTSort: logger.debug(u"%d %s[%s] -%s-> %s[%s]" % (index, self.al.dictNodes[x[0]][u"name"], self.al.dictNodes[x[0]][ARCHI_TYPE], u"UsedBy", self.al.dictNodes[x[1]][u"name"], self.al.dictNodes[x[1]][ARCHI_TYPE])) index += 1 self.al.addToNodeDict(self.al.dictNodes[x[0]][u"name"], self.al.dictBP) self.al.addToNodeDict(self.al.dictNodes[x[1]][u"name"], self.al.dictBP) logger.info(u"Topic Sort Candidates : %d" % (len(listTSort))) nodes = list() index = 0 dictTasks = dict() for x in listTSort: sname = self.al.dictNodes[x[0]][u"name"] tname = self.al.dictNodes[x[1]][u"name"] index += 1 logger.debug(u"%d %s -%s-> %s" % (index, sname, u"UsedBy", tname)) if sname in dictTasks: ln = dictTasks[sname] ln.append(tname) else: ln = list() ln.append(tname) dictTasks[sname] = ln for x in dictTasks.keys(): logger.debug(u"dictTasks[%s]=%s" % (x, dictTasks[x])) a = Task(x, dictTasks[x]) nodes.append(a) for x in self.al.dictBP.keys(): # for x in listBP: if x not in dictTasks: logger.debug(u"Add %s" % (x)) a = Task(x, list()) nodes.append(a) self.format_nodes(nodes) conceptBatches = Concepts(u"Batch", u"archimate:WorkPackage") n = 0 logger.info(u"Batches:") batches = self.get_task_batches(nodes) for bundle in batches: n += 1 name = u"Batch %d" % n c = conceptBatches.addConceptKeyType(name, u"archimate:WorkPackage") for node in bundle: c.addConceptKeyType(node.name, u"archimate:BusinessProcess") logger.info(u"%d : %s" % (n, ", ".join(node.name.lstrip() for node in bundle))) Concepts.saveConcepts(conceptBatches, fileConceptsBatches) return conceptBatches
class PPTXCreateArchil(object): graph = None dictNodes = None dictEdges = None dictText = None dictNodeXY = None dictTextXY = None def __init__(self, fileCrawl, fileArchimate): self.EMU = 914400.0 self.fileArchimate = fileArchimate self.path_to_presentation = fileCrawl self.dictNodes = dict() self.dictEdges = dict() self.dictText = dict() self.dictNodeXY = dict() self.dictTextXY = dict() self.al = ArchiLib(fileArchimate) self.graph = GraphVizGraph() # self.graph = NetworkXGraph() # self.graph = PatternGraph() self.prs = Presentation(self.path_to_presentation) self.concepts = Concepts(u"Application", u"Relations") def addGraphNodes(self, concepts, n=0): n += 1 for c in concepts.getConcepts().values(): logger.debug(u"%s[%d]" % (c.name, len(c.name))) if len(c.name.strip(u" ")) == 0: return if not (c.typeName in (u"Source", u"Target")): return logger.debug(u"%d : %d Node c : %s:%s" % (n, len(c.getConcepts()), c.name, c.typeName)) self.graph.addConcept(c) if len(c.getConcepts()) != 0: self.addGraphNodes(c, n) def addGraphEdges(self, concepts, n=0): n += 1 i = 1 for c in concepts.getConcepts().values(): if (c.name in (u"l", u"h", u"t", u"w")): return logger.debug(u"%d : %d Edge c : %s:%s" % (n, len(c.getConcepts()), c.name, c.typeName)) if i == 1: p = c i += 1 else: self.graph.addEdge(p, c) if len(c.getConcepts()) != 0: self.addGraphEdges(c, n) def graphConcepts(self, concepts, graph=None): logger.info(u"Adding nodes the graph ...") self.addGraphNodes(concepts) logger.info(u"Adding edges the graph ...") self.addGraphEdges(concepts) if isinstance(graph, GraphVizGraph): filename = u"example.png" graph.exportGraph(filename=filename) logger.info(u"Saved Graph - %s" % filename) if isinstance(graph, Neo4JGraph): graph.setNodeLabels() if isinstance(graph, NetworkXGraph): graph.drawGraph(u"concepts.png") filename = u"concepts.net" logger.info(u"Saving Pajek - %s" % filename) graph.saveGraphPajek(filename) graph.saveGraph(u"concepts.gml") logger.info(u"Saving Graph - %s" % u"concepts.gml") if isinstance(graph, PatternGraph): logger.info(u"Exporting Graph") graph.exportGraph() def findID(self, nid): try: for x in self.dictNodes.keys(): logger.debug(u" dictNodes[%s] : %s" % (self.dictNodes[x], x)) if nid in self.dictNodes[x]: logger.debug(u"Found %s in %s" % (x, self.dictNodes[x])) return x except: em = format_exc().split('\n')[-2] logger.warn(u"findID : Warning: %s" % (em)) return None def findXY(self, nid, d): ld = list() try: ld = d[nid] logger.debug(u"ld : %s" % ld) except: pass return ld def logList(self, l, n=0): n += 1 s = " " * n logger.info(u"%sn=%d" % (s, n)) for x in l: # logger.info("%sx=%s" % (s, x)) if isinstance(x, list): logger.info(u"%slist: %s" % (s, x)) self.logList(x, n) elif isinstance(x, dict): logger.info(u"%sdict: %s" % (s, x)) self.logList(x, n) elif isinstance(x, tuple): logger.info(u"%stuple: %s" % (s, x)) self.logList(x, n) else: if isinstance(x, str): logger.info(u"%sstr: %s" % (s, x)) elif isinstance(x, float): logger.info(u"%sfloat: %3.2f" % (s, x)) elif isinstance(x, int): logger.info(u"%sint: %d" % (s, x)) def shapeText(self, shape): name = u"" if shape.has_text_frame: text_frame = shape.text_frame for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: logger.debug(u"%s" % run.text) name = name + run.text + u" " return name def shapeDim(self, shape): t = shape.top / self.EMU l = shape.left / self.EMU h = shape.height / self.EMU w = shape.width / self.EMU nid = shape.id dictDim = dict() dictDim[u"t"] = t dictDim[u"l"] = l dictDim[u"h"] = h dictDim[u"w"] = w self.dictNodeXY[nid] = dictDim logger.debug(u"shape.top : %3.2f" % (t)) logger.debug(u"shape.left : %3.2f" % (l)) logger.debug(u"shape.height : %3.2f" % (h)) logger.debug(u"shape.width : %3.2f" % (w)) logger.debug(u"shape.shape_type : %s" % shape.shape_type) return nid, t, l, h, w def addDictNodes(self, nid, name): name = unicode(name).rstrip(u" ").lstrip(u" ") if not (len(name) > 0): logger.warn(u"No Name!") return if name in self.dictNodes: nl = self.dictNodes[name] nl.append(nid) logger.debug(u"Duplicate Keys %s...%s" % (name, self.dictNodes[name])) else: nl = list() nl.append(nid) self.dictNodes[name] = nl def addDictEdges(self, nid, xl): nxl = list() for x in xl: nxl.append(int(x)) logger.debug(u"%d:%s" % (nid, x)) lenNXL = len(nxl) # # Only add connections between two nodes # if lenNXL == 3: if self.dictEdges.has_key(nid): nl = self.dictEdges[nid] nl.append(nxl) logger.debug(u"Duplicate Edges ...%s" % (self.dictEdges[nid])) else: el = list() el.append(nxl) self.dictEdges[nid] = el else: logger.debug(u"Only %d Connectors!" % (len(nxl))) return lenNXL def showConcepts(self, concepts): n = 0 for x in concepts.getConcepts().values(): n += 1 logger.info(u"x %s[%s]" % (x.name, x.typeName)) for y in x.getConcepts().values(): logger.info(u" y %s[%s]" % (y.name, y.typeName)) for z in y.getConcepts().values(): if not (z.name in (u"h", u"l", u"t", u"w")): logger.info(u" z %s[%s]" % (z.name, z.typeName)) def getPoint(self, d): t = d[u"t"] l = d[u"l"] h = d[u"h"] w = d[u"w"] py = t + (h / 2.0) px = l + (h / 2.0) return px, py def lineMagnitude(self, x1, y1, x2, y2): lineMagnitude = math.sqrt( math.pow((x2 - x1), 2) + math.pow((y2 - y1), 2)) return lineMagnitude # Calc minimum distance from a point and a line segment (i.e. consecutive vertices in a polyline). def DistancePointLine(self, px, py, x1, y1, x2, y2): try: # http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/source.vba LineMag = self.lineMagnitude(x1, y1, x2, y2) u1 = (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1))) u = u1 / (LineMag * LineMag) if (u < 0.00001) or (u > 1): # closest point does not fall within the line segment, take the shorter distance # to an endpoint ix = self.lineMagnitude(px, py, x1, y1) iy = self.lineMagnitude(px, py, x2, y2) if ix > iy: DistancePointLine = iy else: DistancePointLine = ix else: # Intersecting point is on the line, use the formula ix = x1 + u * (x2 - x1) iy = y1 + u * (y2 - y1) DistancePointLine = self.lineMagnitude(px, py, ix, iy) return DistancePointLine except: return 0 def crawlPPTX(self): sNum = 0 for slide in self.prs.slides: logger.debug(u"--new slide--") logger.debug(u"%s" % slide.partname) logger.debug(u"slideName : %s" % slide.name) sNum += 1 # # Get Title of Slide # titleSlide = u"" for idx, ph in enumerate(slide.shapes.placeholders): # logger.debug ("** %s:%s **" % (idx, ph.text)) if idx == 0: titleSlide = ph.text u = self.al.cleanString(titleSlide) logger.info(u"%d.%s" % (sNum, u)) tss = u"%d.%s" % (sNum, u) q = self.concepts.addConceptKeyType(tss, u"Slide") # showConcepts(concepts) # # Iterate ihrough slides # n = 0 nc = 0 for shape in slide.shapes: logger.debug(u"...%s..." % type(shape)) logger.debug(u"shape.element.xml : %s" % shape.element.xml) logger.debug(u"shape.name : %s[%d]" % (shape.name, shape.id - 1)) n += 1 sn = shape.name nid = shape.id # Get Shape Info if shape.name[:5] in (u"Recta", u"Round", u"Strai"): nid, t, l, h, w = self.shapeDim(shape) tl = (l, t) tr = (l + w, t) bl = (l, t + h) br = (l + w, t + h) name = self.shapeText(shape) if len(name) > 1: logger.info(u" node : %s[%d] - %s" % (name, nid, shape.name)) self.addDictNodes(nid, name) b = q.addConceptKeyType(self.al.cleanString(name), u"Node") b.addConceptKeyType(u"t", str(t)) b.addConceptKeyType(u"l", str(l)) b.addConceptKeyType(u"h", str(h)) b.addConceptKeyType(u"w", str(w)) # # Add in Connections # elif sn.find(u"Connector") != -1: xmlShape = shape.element.xml logger.debug(u"xmlShape : %s" % xmlShape) tree = etree.fromstring(xmlShape) xl = tree.xpath(u"//@id") logger.debug(u"xl : %s" % xl) numEdges = self.addDictEdges(nid, xl) if numEdges == 3: nc += 1 logger.info(u" %d Found Edge %d" % (nc, shape.id)) # # Get Text boxes and associate with Connector # elif shape.name[:8] in (u"Text Box", u"TextBox "): nid, t, l, h, w = self.shapeDim(shape) name = self.shapeText(shape) if name is not None: nxl = list() nxl.append(nid) self.dictText[name] = nxl logger.info(u" TextBox : %s[%d]" % (name, shape.id)) else: logger.debug(u"Skipped : %s" % shape.name) # # Now match the Connector with text # listEdges = self.dictEdges.values() logger.info(u"listEdges : %d" % len(listEdges)) tbFound = 0 tbTotal = len(self.dictTextXY) logger.info(u"Search for %s Text Box Connector's" % len(self.dictTextXY)) for txt in self.dictTextXY.keys(): logger.debug(u"txt : %s[%s]" % (txt, dictTextXY[txt])) searchText = self.findID(txt, self.dictText) logger.info(u" Search Text : %s" % (searchText)) # get text point - middle of node px, py = self.getPoint(dictTextXY[txt]) cDist = 1000.0 cNode = None csn = None ctn = None # for each node in dictEdges ni = 0 for edge in listEdges: logger.debug(u" edge: %s" % edge) try: # get source source = edge[0][2] sName = self.findID(source) sl = self.dictNodeXY[source] spx, spy = self.getPoint(sl) # get target target = edge[0][1] tName = self.findID(target) tl = self.dictNodeXY[target] tpx, tpy = self.getPoint(tl) # determine distance between points d = self.DistancePointLine(px, py, spx, spy, tpx, tpy) if d < cDist: cDist = d cNode = edge[0][0] csn = sName tsn = tName except: pass if cNode != None: tbFound += 1 logger.debug(u" Closest Connector : %s" % cNode) logger.info( u" found(%d:%d] - %s->%s->%s [%2.3f]" % (tbFound, tbTotal, csn, searchText, tsn, cDist)) edge = searchText source = sName target = tName dimSource = sl dimTarget = tl if edge is None: edge = u"TBD" d = q.getConcepts()[csn] for ld in dimSource.keys(): logger.debug(u"%s %s:%2.3f" % (source, ld, dimSource[ld])) d.addConceptKeyType(ld, str(dimSource[ld])) f = d.addConceptKeyType(target, u"Target") for ld in dimTarget.keys(): logger.debug(u"%s %s:%2.3f" % (target, ld, dimSource[ld])) f.addConceptKeyType(ld, str(dimTarget[ld])) f.addConceptKeyType(self.al.cleanString(edge), u"Edge") if tbTotal != 0: logger.info(u"Found [%3.1f] Text Box Connectors" % ((tbFound / float(tbTotal)) * 100.0)) dictTextXY = dict() return self.concepts
def exportArchi(self): m = hashlib.md5() concepts = Concepts(u"Node", u"Nodes") logger.info(u"Found %d Nodes" % len(self.al.dictNodes)) logger.info(u"Found %d Edges" % len(self.al.dictEdges)) count = 0 listTSort = list() for x in self.al.dictEdges.keys(): logger.debug(u"Edge [%s]=%s" % (self.al.dictEdges[x], x)) if self.al.dictEdges[x].has_key( u"source") and self.al.dictEdges[x].has_key(u"target"): typeEdge = self.al.dictEdges[x][ARCHI_TYPE] logger.debug(u"Edge : %s" % typeEdge) source = self.al.dictEdges[x][u"source"] logger.debug(u"Source : %s" % source) target = self.al.dictEdges[x][u"target"] logger.debug(u"Target : %s" % target) logger.debug(u" Rel : %s" % (self.al.dictEdges[x][ARCHI_TYPE])) sourceName = self.al.getNodeName(source) targetName = self.al.getNodeName(target) logger.debug( u" %s--%s--%s" % (sourceName, self.al.dictEdges[x][ARCHI_TYPE][10:], targetName)) if source in self.al.dictNodes: l = list() sc = concepts.addConceptKeyType( sourceName, self.al.dictNodes[source][ARCHI_TYPE][10:]) # getWords(sourceName, sc) nameEdge = u"(" + sourceName + u"," + targetName + u")" logger.debug(u"nameEdge : %s[%d]" % (nameEdge, len(nameEdge))) logger.debug(u"typeEdge : %s" % typeEdge[10:]) ne = str(self.al.cleanString(nameEdge)) hl = hashlib.sha224(str(ne)).hexdigest() logger.debug(u"hash : %s" % hl) nh = u"%s-%s" % (typeEdge[10:], hl) rc = sc.addConceptKeyType(nh, typeEdge[10:]) if self.al.dictNodes.has_key(target): tc = rc.addConceptKeyType( targetName, self.al.dictNodes[target][ARCHI_TYPE][10:]) # getWords(sourceName, tc) Concepts.saveConcepts(concepts, self.fileConceptsExport) return concepts