Пример #1
0
def test_ExportArchiFolderModels(cleandir):

    if __name__ == u"__main__":
        cleandir()

    assert (os.path.isfile(fileArchimateTest) is True)
    al = ArchiLib(fileArchimateTest)

    folder = u"Scenarios"

    logger.info(u"Exporting Folder : %s" % folder)
    listMTE = al.getModelsInFolder(folder)
    assert (listMTE is not None)

    logger.info(u"len(listMTE) = %d" % len(listMTE))
    assert (len(listMTE) == 2)

    concepts = Concepts(u"Export", u"Pickle")

    for ModelToExport in listMTE:
        logger.info(u"  Model : %s" % ModelToExport)
        d = concepts.addConceptKeyType(ModelToExport, u"Model")
        al.recurseModel(ModelToExport)

    al.outputCSVtoFile(concepts, fileExport=fileCSVExport)
    assert (os.path.isfile(fileCSVExport) is True)

    Concepts.saveConcepts(concepts, fileConceptsExport)
    logger.info(u"Save Concepts : %s" % fileConceptsExport)

    assert (os.path.isfile(fileConceptsExport) is True)
Пример #2
0
    def collectDependancyAnalysisNodes(self):

        count = 0
        listTSort = list()
        for x in self.al.dictEdges.keys():
            logger.debug(u"[%s]=%s" % (self.al.dictEdges[x][u"id"], x))

            if u"source" in self.al.dictEdges[x]:
                source = self.al.dictEdges[x][u"source"]
                target = self.al.dictEdges[x][u"target"]

                logger.debug(u"  Rel    : %s" % (self.al.dictEdges[x][ARCHI_TYPE]))

                if self.al.dictEdges[x][ARCHI_TYPE] in (u"archimate:FlowRelationship"):

                    # al.countNodeType(al.dictNodes[source][ARCHI_TYPE])
                    # al.countNodeType(al.dictNodes[target][ARCHI_TYPE])
                    # al.countNodeType(al.dictEdges[x][ARCHI_TYPE])

                    if (self.al.dictNodes[source][ARCHI_TYPE] == u"archimate:BusinessProcess") and \
                            self.al.dictNodes[target][ARCHI_TYPE] == u"archimate:BusinessProcess":

                        sourceName = self.al.getNodeName(source)
                        targetName = self.al.getNodeName(target)

                        if sourceName[0].isdigit() or targetName[0].isdigit():
                            continue

                        logger.debug(u" %s:%s" % (sourceName, targetName))

                        l = list()

                        sc = self.findConcept(self.concepts, sourceName)
                        if sc is None:
                            logger.debug(u"New Target - %s" % sourceName)
                            sc = self.concepts.addConceptKeyType(self.al.getNodeName(source), u"Source")
                            self.getWords(sourceName, sc)
                        else:
                            logger.debug(u"Prior Target %s" % sourceName)

                        tc = self.findConcept(self.concepts, targetName)
                        if tc is None:
                            logger.debug(u"New Target %s" % targetName)
                            tc = sc.addConceptKeyType(self.al.getNodeName(target), u"Target")
                            self.getWords(sourceName, tc)
                        else:
                            logger.debug(u"Prior Target %s" % targetName)
                            sc.addConcept(tc)

                        l.append(target)
                        l.append(source)
                        listTSort.append(l)

        logger.debug(u"Edges = %s" % listTSort)

        Concepts.saveConcepts(self.concepts, fileConceptsTraversal)

        self.dependancyAnalysis(listTSort)

        return self.concepts, listTSort
Пример #3
0
    def exportNeo4JToConcepts(self, concepts, fileNodes=u"nodes.p"):

        qs = u"Match n return n"

        lq, qd = self.cypherQuery(qs)

        for x in lq:
            if len(x) == 2:
                logger.info(u"%s[%s]" % (x[0], x[1]))
                concepts.addConceptKeyType(x[0], x[1])
            else:
                logger.warn(u"Not a standard node : %d : %s" % (len(x), x))

        # Match r relations
        qs = u"match n-[r]-m return n, r, m"
        lq, qd = self.cypherQuery(qs)

        for x in lq:
            if len(x) == 6:
                logger.info(u"%s[%s]" % (x[0], x[1]))
                concepts.addConceptKeyType(x[0], x[1])
            else:
                logger.warn(u"Not a standard node : %d : %s" % (len(x), x))

        Concepts.saveConcepts(concepts, fileNodes)

        return concepts
Пример #4
0
def test_ExportArchiFolderModels(cleandir):

    if __name__ == u"__main__":
        cleandir()

    assert (os.path.isfile(fileArchimateTest) is True)
    al = ArchiLib(fileArchimateTest)

    folder = u"Scenarios"

    logger.info(u"Exporting Folder : %s" % folder)
    listMTE = al.getModelsInFolder(folder)
    assert (listMTE is not None)

    logger.info(u"len(listMTE) = %d" % len(listMTE))
    assert (len(listMTE) == 2)

    concepts = Concepts(u"Export", u"Pickle")

    for ModelToExport in listMTE:
        logger.info(u"  Model : %s" % ModelToExport)
        d = concepts.addConceptKeyType(ModelToExport, u"Model")
        al.recurseModel(ModelToExport)

    al.outputCSVtoFile(concepts, fileExport=fileCSVExport)
    assert (os.path.isfile(fileCSVExport) is True)

    Concepts.saveConcepts(concepts, fileConceptsExport)
    logger.info(u"Save Concepts : %s" % fileConceptsExport)

    assert (os.path.isfile(fileConceptsExport) is True)
Пример #5
0
    def findSimilarties(self):

        logger.info(u"Compute Similarity")

        self.conceptsSimilarity = Concepts(u"ConceptsSimilarity", u"Similarities")

        # Compute similarity between documents / concepts
        similarityThreshold = self.similarity

        for document in self.documentsList:
            indexNum = self.documentsList.index(document)

            self.df = self.concepts.getConcepts().keys()

            logger.info(u"++conceptsDoc %s" % (self.df[indexNum]))
            logger.info(u"  documentsList[" + str(indexNum) + u"]=" + u"".join(x + u" " for x in document))

            # Show common topics
            d = [unicode(x).strip().replace(u"'", u"") for x in document]
            e = [unicode(y).strip().replace(u"\"", u"") for y in self.listTopics]

            s1 = set(e)
            s2 = set(d)
            common = s1 & s2
            lc = [x for x in common]
            logger.info(u"  Common Topics : %s{%s}" % (lc, self.al.dictName[document][ARCHI_TYPE]))

            self.doComputation(indexNum, similarityThreshold, tfAddWords=True)

        Concepts.saveConcepts(self.conceptsSimilarity, conceptsSimilarityFile)

        logger.info(u"Saved Concepts : %s" % conceptsSimilarityFile)

        return self.conceptsSimilarity
def requirementAnalysis(fileArchimate=None):

    if fileArchimate is None:
        fileArchimate = u"/Users/morrj140/Documents/SolutionEngineering/Archimate Models/DVC v38.archimate"

    al = ArchiLib(fileArchimate)

    conceptsFile = fileConceptsRequirements

    searchTypes = list()
    searchTypes.append(u"archimate:Requirement")
    nl = al.getTypeNodes(searchTypes)

    logger.info(u"Find Words in Requirements...")
    concepts = Concepts(u"Requirement", u"Requirements")
    n = 0
    for sentence in nl:
        n += 1
        logger.debug(u"%s" % sentence)

        c = concepts.addConceptKeyType(u"Document" + str(n), u"Document")
        d = c.addConceptKeyType(sentence, u"Sentence" + str(n))

        if True and sentence is not None:
            cleanSentence = ' '.join([word for word in sentence.split(u" ") if word not in stop])
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                if len(word) > 1 and pos[0] == u"N":
                    e = d.addConceptKeyType(word, u"Word")
                    f = e.addConceptKeyType(pos, u"POS")

    Concepts.saveConcepts(concepts, conceptsFile)
    logger.info(u"Saved : %s" % conceptsFile)

    chunks = Chunks(concepts)
    chunks.createChunks()
Пример #7
0
def test_ExportArchi(cleandir):

    if __name__ == u"__main__":
        cleandir()

    logger.info(u"Using : %s" % fileArchimateTest)

    assert (os.path.isfile(fileArchimateTest) is True)

    al = None
    concepts = None

    al = ArchiLib(fileArchimateTest)

    assert (al is not None)

    concepts = Concepts(u"Node", u"Nodes")

    assert (concepts is not None)

    logger.info(u"Found %d Nodes" % len(al.dictNodes))
    logger.info(u"Found %d Edges" % len(al.dictEdges))

    assert (len(al.dictNodes) == 45)
    assert (len(al.dictEdges) == 36)

    count = 0
    listTSort = list()
    for x in al.dictEdges.keys():
        logger.info(u"[%s]=%s" % (al.dictEdges[x][u"id"], x))

        if u"source" in al.dictEdges[x]:
            source = al.dictEdges[x][u"source"]
            target = al.dictEdges[x][u"target"]

            logger.info(u"  Rel    : %s" % (al.dictEdges[x][ARCHI_TYPE]))

            sourceName = al.getNodeName(source)
            targetName = al.getNodeName(target)

            logger.info(
                u" %s--%s--%s" %
                (sourceName, al.dictEdges[x][ARCHI_TYPE][10:], targetName))

            sc = concepts.addConceptKeyType(
                sourceName, al.dictNodes[source][ARCHI_TYPE][10:])
            # getWords(sourceName, sc)

            tc = sc.addConceptKeyType(targetName,
                                      al.dictNodes[target][ARCHI_TYPE][10:])
            # getWords(sourceName, tc)

    Concepts.saveConcepts(concepts, fileConceptsExport)

    assert (len(concepts.cd) == 17)

    assert (os.path.isfile(fileConceptsExport) is True)

    assert (concepts.typeName == u"Nodes")
Пример #8
0
 def saveTopics(self, topics):
     wordConcepts = Concepts(u"TopicConcepts", u"Topics")
     for topic in topics:
         logger.debug(u"Topic:" + topic[0])
         w = wordConcepts.addConceptKeyType(topic[0], u"Topic")
         w.count = topic[1]
     Concepts.saveConcepts(wordConcepts, self.topicsFile)
     return wordConcepts
Пример #9
0
    def _saveConcepts(self):
        logger.info(u"Saving %s" % self.documentsConceptsFile)
        Concepts.saveConcepts(self.documentsConcepts, self.documentsConceptsFile)
        logger.info(u"Saving %s" % self.wordsConceptsFile)
        Concepts.saveConcepts(self.wordsConcepts, self.wordsConceptsFile)

        logger.info(u"Saving Documents %s" % os.getcwd() + os.sep + self.documentsConceptsFile)
        logger.info(u"Saving Words%s" % os.getcwd() + os.sep + self.wordsConceptsFile)
Пример #10
0
    def exportArchi(self):

        m = hashlib.md5()

        concepts = Concepts(u"Node", u"Nodes")

        logger.info(u"Found %d Nodes" % len(self.al.dictNodes))
        logger.info(u"Found %d Edges" % len(self.al.dictEdges))

        count = 0
        listTSort = list()
        for x in self.al.dictEdges.keys():
            logger.debug(u"Edge [%s]=%s" % (self.al.dictEdges[x], x))

            if self.al.dictEdges[x].has_key(u"source") and self.al.dictEdges[x].has_key(u"target"):

                typeEdge = self.al.dictEdges[x][ARCHI_TYPE]
                logger.debug(u"Edge   : %s" % typeEdge)

                source = self.al.dictEdges[x][u"source"]
                logger.debug(u"Source : %s" % source)

                target = self.al.dictEdges[x][u"target"]
                logger.debug(u"Target : %s" % target)

                logger.debug(u"  Rel    : %s" % (self.al.dictEdges[x][ARCHI_TYPE]))

                sourceName = self.al.getNodeName(source)
                targetName = self.al.getNodeName(target)

                logger.debug(u" %s--%s--%s" % (sourceName, self.al.dictEdges[x][ARCHI_TYPE][10:], targetName))

                if source in self.al.dictNodes:
                    l = list()
                    sc = concepts.addConceptKeyType(sourceName, self.al.dictNodes[source][ARCHI_TYPE][10:])
                    # getWords(sourceName, sc)

                nameEdge = u"(" + sourceName + u"," + targetName + u")"
                logger.debug(u"nameEdge : %s[%d]" % (nameEdge, len(nameEdge)))
                logger.debug(u"typeEdge : %s" % typeEdge[10:])

                ne = str(self.al.cleanString(nameEdge))
                hl = hashlib.sha224(str(ne)).hexdigest()

                logger.debug(u"hash : %s" % hl)

                nh = u"%s-%s" % (typeEdge[10:], hl)

                rc = sc.addConceptKeyType(nh, typeEdge[10:])

                if self.al.dictNodes.has_key(target):
                    tc = rc.addConceptKeyType(targetName, self.al.dictNodes[target][ARCHI_TYPE][10:])
                    # getWords(sourceName, tc)

        Concepts.saveConcepts(concepts, self.fileConceptsExport)

        return concepts
Пример #11
0
def test_ExportArchi(cleandir):

    if __name__ == u"__main__":
        cleandir()

    logger.info(u"Using : %s" % fileArchimateTest)

    assert (os.path.isfile(fileArchimateTest) is True)

    al = None
    concepts = None

    al = ArchiLib(fileArchimateTest)

    assert (al is not None)

    concepts = Concepts(u"Node", u"Nodes")

    assert (concepts is not None)

    logger.info(u"Found %d Nodes" % len(al.dictNodes))
    logger.info(u"Found %d Edges" % len(al.dictEdges))

    assert (len(al.dictNodes) == 45)
    assert (len(al.dictEdges) == 36)

    count = 0
    listTSort = list()
    for x in al.dictEdges.keys():
        logger.info(u"[%s]=%s" % (al.dictEdges[x][u"id"], x))

        if u"source" in al.dictEdges[x]:
            source = al.dictEdges[x][u"source"]
            target = al.dictEdges[x][u"target"]

            logger.info(u"  Rel    : %s" % (al.dictEdges[x][ARCHI_TYPE]))

            sourceName = al.getNodeName(source)
            targetName = al.getNodeName(target)

            logger.info(u" %s--%s--%s" % (sourceName, al.dictEdges[x][ARCHI_TYPE][10:], targetName))

            sc = concepts.addConceptKeyType(sourceName, al.dictNodes[source][ARCHI_TYPE][10:])
            # getWords(sourceName, sc)

            tc = sc.addConceptKeyType(targetName, al.dictNodes[target][ARCHI_TYPE][10:])
            # getWords(sourceName, tc)

    Concepts.saveConcepts(concepts, fileConceptsExport)

    assert(len(concepts.cd) == 17)

    assert (os.path.isfile(fileConceptsExport) is True)

    assert(concepts.typeName == u"Nodes")
Пример #12
0
def PPTXCrawl(filePPTX):

    logger.info(u"Using : %s" % filePPTX)

    cpptx = PPTXCreateArchil(filePPTX)

    c = cpptx.crawlPPTX()

    c.logConcepts()

    Concepts.saveConcepts(c, fileConceptsPPTX)
Пример #13
0
def test_PPTXCrawl(fileArchimate):

    assert (os.path.isfile(filePPTXIn) is True)

    logger.info(u"Using : %s" % filePPTXIn)

    cpptx = PPTXCreateArchil(filePPTXIn, fileArchimate)

    c = cpptx.crawlPPTX()

    Concepts.saveConcepts(c, fileConceptsPPTX)

    assert (os.path.isfile(fileConceptsPPTX) is True)
Пример #14
0
def PPTXCreateArchi():

    start_time = ArchiLib.startTimer()

    logger.info(u"Using : %s" % filePPTXIn)

    cpptx = PPTXCreateArchil(filePPTXIn, fileArchimateTest)

    c = cpptx.crawlPPTX()

    c.logConcepts()

    Concepts.saveConcepts(c, fileConceptsPPTX)

    ArchiLib.stopTimer(start_time)
Пример #15
0
def PPTXCreateArchi():

    start_time = ArchiLib.startTimer()

    logger.info(u"Using : %s" % filePPTXIn)

    cpptx = PPTXCreateArchil(filePPTXIn, fileArchimateTest)

    c = cpptx.crawlPPTX()

    c.logConcepts()

    Concepts.saveConcepts(c, fileConceptsPPTX)

    ArchiLib.stopTimer(start_time)
def createArchimateConcepts(fileArchimate, fileConceptsArch):

    logger.info(u"Using : %s" % fileArchimate)

    concepts = Concepts(fileArchimateModel, u"Archimate")

    al = ArchiLib(fileArchimate)

    al.logTypeCounts()

    #
    # Create Concepts from Archimate
    #
    al.folderConcepts(concepts)
    Concepts.saveConcepts(concepts, fileConceptsArch)
    logger.info(u"Saved concepts to : %s" % fileConceptsArch)
Пример #17
0
    def exportArchiFolderModels(self, folder):

        logger.info(u"Exporting Folder : %s" % folder)

        listMTE = self.al.getModelsInFolder(folder)

        concepts = Concepts(u"Export", u"Pickle")

        for ModelToExport in listMTE:
            logger.info(u"  Model : %s" % ModelToExport)
            d = concepts.addConceptKeyType(ModelToExport, u"Model")
            self.al.recurseModel(ModelToExport, d)

        self.al.outputCSVtoFile(concepts, fileCSVExport)

        Concepts.saveConcepts(concepts, self.conceptsFile)

        logger.info(u"Save Concepts : %s" % self.conceptsFile)
Пример #18
0
    def exportArchiFolderModels(self, folder):

        logger.info(u"Exporting Folder : %s" % folder)

        listMTE = self.al.getModelsInFolder(folder)

        concepts = Concepts(u"Export", u"Pickle")

        for ModelToExport in listMTE:
            logger.info(u"  Model : %s" % ModelToExport)
            d = concepts.addConceptKeyType(ModelToExport, u"Model")
            self.al.recurseModel(ModelToExport, d)

        self.al.outputCSVtoFile(concepts, fileCSVExport)

        Concepts.saveConcepts(concepts, self.conceptsFile)

        logger.info(u"Save Concepts : %s" % self.conceptsFile)
Пример #19
0
def test_CreateArchimateConcepts(cleandir, fileArchimate):

    assert (os.path.isfile(fileArchimate) is True)

    logger.info(u"Using : %s" % fileArchimate)

    concepts = Concepts(fileArchimate, u"Archimate")

    al = ArchiLib(fileArchimate)

    lc = al.logTypeCounts()

    assert (len(lc) > 0)

    #
    # Create Concepts from Archimate
    #
    al.folderConcepts(concepts)
    Concepts.saveConcepts(concepts, fileConceptsArch)
    logger.info(u"Saved concepts to : %s" % fileConceptsArch)

    assert (os.path.isfile(fileConceptsArch) is True)
Пример #20
0
def test_ExportArchiModel(cleandir):

    if __name__ == u"__main__":
        cleandir()

    assert (os.path.isfile(fileArchimateTest) is True)

    al = ArchiLib(fileArchimateTest)

    listMTE = list()
    listMTE.append(u"01. Market to Leads")

    concepts = Concepts(u"Export", u"Model")

    for ModelToExport in listMTE:
        al.recurseModel(ModelToExport)

    Concepts.saveConcepts(concepts, fileConceptsExport)
    assert (os.path.isfile(fileConceptsExport) is True)

    al.outputCSVtoFile(concepts, fileCSVExport)
    assert (os.path.isfile(fileCSVExport) is True)
Пример #21
0
def test_ArchimateConcepts(cleandir):

    if __name__ == u"__main__":
        cleandir()

    logger.info(u"Using : %s" % fileArchimateTest)

    assert (os.path.isfile(fileArchimateTest) is True)

    concepts = Concepts(fileConceptsArch, u"Archimate")

    al = ArchiLib(fileArchimateTest)

    #
    # Create Concepts from Archimate
    #
    al.folderConcepts(concepts)

    Concepts.saveConcepts(concepts, fileConceptsArch)
    logger.info(u"Saved concepts to : %s" % fileConceptsArch)

    assert (os.path.isfile(fileConceptsArch) is True)
Пример #22
0
def test_ExportArchiModel(cleandir):

    if __name__ == u"__main__":
        cleandir()

    assert (os.path.isfile(fileArchimateTest) is True)

    al = ArchiLib(fileArchimateTest)

    listMTE = list()
    listMTE.append(u"01. Market to Leads")

    concepts = Concepts(u"Export", u"Model")

    for ModelToExport in listMTE:
        al.recurseModel(ModelToExport)

    Concepts.saveConcepts(concepts, fileConceptsExport)
    assert (os.path.isfile(fileConceptsExport) is True)

    al.outputCSVtoFile(concepts, fileCSVExport)
    assert (os.path.isfile(fileCSVExport) is True)
Пример #23
0
def test_ArchimateConcepts(cleandir):

    if __name__ == u"__main__":
        cleandir()

    logger.info(u"Using : %s" % fileArchimateTest)

    assert (os.path.isfile(fileArchimateTest) is True)

    concepts = Concepts(fileConceptsArch, u"Archimate")

    al = ArchiLib(fileArchimateTest)

    #
    # Create Concepts from Archimate
    #
    al.folderConcepts(concepts)

    Concepts.saveConcepts(concepts, fileConceptsArch)
    logger.info(u"Saved concepts to : %s" % fileConceptsArch)

    assert (os.path.isfile(fileConceptsArch) is True)
Пример #24
0
def test_RequirementAnalysis(cleandir, fileArchimate):

    assert (os.path.isfile(filePPTXIn) is True)

    al = ArchiLib(fileArchimate)

    conceptsFile = fileConceptsRequirements

    searchTypes = list()
    searchTypes.append(u"archimate:Requirement")
    nl = al.getTypeNodes(searchTypes)

    logger.info(u"Find Words in Requirements...")
    concepts = Concepts(u"Requirement", u"Requirements")
    n = 0
    for sentence in nl:
        n += 1
        logger.debug(u"%s" % sentence)

        c = concepts.addConceptKeyType(u"Document" + unicode(n), u"Document")
        d = c.addConceptKeyType(sentence, u"Sentence" + unicode(n))

        if True and sentence is not None:
            cleanSentence = ' '.join([word for word in sentence.split(" ") if word not in stop])
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                if len(word) > 1 and pos[0] == u"N":
                    e = d.addConceptKeyType(word, u"Word")
                    f = e.addConceptKeyType(pos, u"POS")

    Concepts.saveConcepts(concepts, conceptsFile)
    logger.info(u"Saved : %s" % conceptsFile)

    assert (os.path.isfile(conceptsFile) is True)

    chunks = Chunks(concepts)
    chunks.createChunks()

    assert (os.path.isfile(fileConceptsChunks) is True)
Пример #25
0
    def analyzeNamedEntities(self):
        rels = (u"archimate:AccessRelationship",
                u"archimate:SpecialisationRelationship",
                u"archimate:CompositionRelationship",
                u"archimate:AggregationRelationship")

        listType = (u"archimate:Requirement", )

        dictEntities = self.al.getTypeNodes(listType)

        concepts = Concepts(u"Entities", u"BusinessObject")

        for x in self.al.dictEdges.keys():
            try:
                logger.debug(u"[%s]=%s" %
                             (x, self.al.dictEdges[x][ARCHI_TYPE]))

                source = self.al.dictEdges[x][u"source"]
                target = self.al.dictEdges[x][u"target"]

                logger.debug(u"  Source : %s" % source)
                logger.debug(u"  Target : %s" % target)
            except:
                logger.warn(u"[%s] ARCH_TYPE Exception" % (x))
                continue

            if self.al.dictEdges[x][ARCHI_TYPE] in rels:
                logger.info(u"%s   ->  [ %s ]  ->   %s" %
                            (self.al.dictNodes[source][u"name"][:20],
                             self.al.dictEdges[x][ARCHI_TYPE],
                             self.al.dictNodes[target][u"name"][:20]))

                listNodes = self.al.getEdgesForNode(source, rels)
                for x in listNodes:
                    logger.debug(u"    %s" % (x))

        Concepts.saveConcepts(concepts, fileConceptsRelations)
Пример #26
0
    def createChunks(self):
        stop = stopwords.words('english')
        stop.append("This")
        stop.append("The")
        stop.append(",")
        stop.append(".")
        stop.append("..")
        stop.append("...")
        stop.append(".")
        stop.append(";")
        stop.append("and")

        stemmer = PorterStemmer()
        lemmatizer = WordNetLemmatizer()
        tokenizer = RegexpTokenizer("[\w]+")

        for document in self.concepts.getConcepts().values():
            logger.info("%s" % document.name)
            d = self.chunkConcepts.addConceptKeyType(document.name, "Document")

            for sentence in document.getConcepts().values():
                logger.debug("%s(%s)" % (sentence.name, sentence.typeName))
                cleanSentence = ' '.join([word for word in sentence.name.split() if word not in stop])

                listSentence = list()

                for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                    logger.debug("Word: " + word + " POS: " + pos)

                    if pos[:1] == "N":
                        lemmaWord = lemmatizer.lemmatize(word)
                        logger.debug("Word: " + word + " Lemma: " + lemmaWord)

                        synset = wn.synsets(word, pos='n')
                        logger.debug("synset : %s" %  synset)

                        if len(synset) != 0:
                            syn = synset[0]

                            root = syn.root_hypernyms()
                            logger.debug("root : %s" %  root)

                            hypernyms = syn.hypernyms()
                            logger.debug("hypernyms : %s" %  hypernyms)

                            if len(hypernyms) > 0:
                                hyponyms = syn.hypernyms()[0].hyponyms()
                                logger.debug("hyponyms : %s" %  hyponyms)
                            else:
                                hyponyms = None

                            listSentence.append((word, lemmaWord, pos, root, hypernyms, hyponyms))

                nounSentence = ""
                for word in listSentence:
                    nounSentence += word[1] + " "

                if len(nounSentence) > 2:
                    e = d.addConceptKeyType(nounSentence, "NounSentence")

                    for word in listSentence:
                        f = e.addConceptKeyType(word[0], word[2])
                        f.addConceptKeyType(word[1], "Lemma")

                logger.debug("%s = %s" % (cleanSentence, type(cleanSentence)))
                cleanSentence = cleanSentence.decode("utf-8", errors="ignore")
                pt = parsetree(cleanSentence, relations=True, lemmata=True)

                for sentence in pt:
                    logger.debug("relations: %s" % [x for x in sentence.relations])
                    logger.debug("subject  : %s" % [x for x in sentence.subjects])
                    logger.debug("verb     : %s" % [x for x in sentence.verbs])
                    logger.debug("object   : %s" % [x for x in sentence.objects])

                    if sentence.subjects is not None:
                        logger.debug("Sentence : %s" % sentence.chunks)

                        for chunk in sentence.chunks:
                            logger.debug("Chunk  : %s" % chunk)

                            relation = str(chunk.relation).encode("ascii", errors="ignore").strip()
                            role = str(chunk.role).encode("ascii", errors="ignore").strip()

                            logger.debug("Relation : %s" % relation)
                            logger.debug("Role     : %s" % role)

                            for subject in sentence.subjects:
                                logger.debug("Subject.realtion : %s " % subject.relation)
                                logger.debug("Subject : %s " % subject.string)
                                f = e.addConceptKeyType(subject.string, "SBJ")

                                for verb in sentence.verbs:
                                    if verb.relation == subject.relation:
                                        logger.debug("Verb.realtion : %s " % verb.relation)
                                        logger.debug("Verb   : %s " % verb.string)
                                        g = f.addConceptKeyType(verb.string, "VP")

                                        for obj in sentence.objects:
                                            if obj.relation == verb.relation:
                                                logger.debug("Obj.realtion : %s " % obj.relation)
                                                logger.debug("Object : %s " % obj.string)
                                                g.addConceptKeyType(obj.string, "OBJ")

        Concepts.saveConcepts(self.chunkConcepts, self.chunkFile)
    def createChunks(self):
        stop = stopwords.words(u'english')
        stop.append(u"This")
        stop.append(u"The")
        stop.append(u",")
        stop.append(u".")
        stop.append(u"..")
        stop.append(u"...")
        stop.append(u".")
        stop.append(u";")
        stop.append(u"and")

        stemmer = PorterStemmer()
        lemmatizer = WordNetLemmatizer()
        tokenizer = RegexpTokenizer(u"[\w]+")

        for document in self.concepts.getConcepts().values():
            logger.info(u"%s" % document.name)
            d = self.chunkConcepts.addConceptKeyType(document.name, u"Document")

            for sentence in document.getConcepts().values():
                logger.debug(u"%s(%s)" % (sentence.name, sentence.typeName))
                cleanSentence = ' '.join([word for word in sentence.name.split() if word not in stop])

                listSentence = list()

                for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                    logger.debug(u"Word: " + word + u" POS: " + pos)

                    if pos[:1] == u"N":
                    #if True:
                        lemmaWord = lemmatizer.lemmatize(word)
                        logger.debug(u"Word: " + word + u" Lemma: " + lemmaWord)

                        morphWord = wn.morphy(word, wn.NOUN)
                        if morphWord is not None:
                            logger.debug(u"Word: " + word + u" Morph: " + morphWord)

                        synset = wn.synsets(word, pos=u'n')
                        logger.debug(u"synset : %s" % synset)

                        if len(synset) != 0:
                            syn = synset[0]

                            root = syn.root_hypernyms()
                            logger.debug(u"root : %s" % root)

                            mh = syn.member_holonyms()
                            logger.debug(u"member_holonyms : %s" % mh)

                            hypernyms = syn.hypernyms()
                            logger.debug(u"hypernyms : %s" % hypernyms)

                            if len(hypernyms) > 0:
                                hyponyms = syn.hypernyms()[0].hyponyms()
                                logger.debug(u"hyponyms : %s" % hyponyms)
                            else:
                                hyponyms = None

                            listSentence.append((word, lemmaWord, pos, root, hypernyms, hyponyms))

                nounSentence = u""
                for word in listSentence:
                    nounSentence += word[1] + u" "

                if len(nounSentence) > 2:
                    e = d.addConceptKeyType(nounSentence, u"NounSentence")

                    for word in listSentence:
                        f = e.addConceptKeyType(word[0], word[2])
                        f.addConceptKeyType(word[1], u"Lemma")

                logger.debug(u"%s = %s" % (cleanSentence, type(cleanSentence)))
                cleanSentence = unicode(cleanSentence)
                pt = parsetree(cleanSentence, relations=True, lemmata=True)

                for sentence in pt:
                    logger.debug(u"relations: %s" % [x for x in sentence.relations])
                    logger.debug(u"subject  : %s" % [x for x in sentence.subjects])
                    logger.debug(u"verb     : %s" % [x for x in sentence.verbs])
                    logger.debug(u"object   : %s" % [x for x in sentence.objects])

                    if sentence.subjects is not None:
                        logger.debug(u"Sentence : %s" % sentence.chunks)

                        for chunk in sentence.chunks:
                            logger.debug(u"Chunk  : %s" % chunk)

                            relation = unicode(chunk.relation).strip()
                            role = unicode(chunk.role).strip()

                            logger.debug(u"Relation : %s" % relation)
                            logger.debug(u"Role     : %s" % role)

                            for subject in sentence.subjects:
                                logger.debug(u" Subject.realtion : %s " % subject.relation)
                                logger.debug(u" Subject : %s " % subject.string)
                                f = e.addConceptKeyType(subject.string, u"SBJ")

                                for verb in sentence.verbs:
                                    if verb.relation == subject.relation:
                                        logger.debug(u"  Verb.realtion : %s " % verb.relation)
                                        logger.debug(u"  Verb   : %s " % verb.string)
                                        g = f.addConceptKeyType(verb.string, u"VP")

                                        for obj in sentence.objects:
                                            if obj.relation == verb.relation:
                                                logger.debug(u"    Obj.realtion : %s " % obj.relation)
                                                logger.debug(u"    Object : %s " % obj.string)
                                                g.addConceptKeyType(obj.string, u"OBJ")

        Concepts.saveConcepts(self.chunkConcepts, self.chunkFile)
        logger.info(u"Saved : %s" % self.chunkFile)
Пример #28
0
    def exportArchi(self):

        m = hashlib.md5()

        concepts = Concepts(u"Node", u"Nodes")

        logger.info(u"Found %d Nodes" % len(self.al.dictNodes))
        logger.info(u"Found %d Edges" % len(self.al.dictEdges))

        count = 0
        listTSort = list()
        for x in self.al.dictEdges.keys():
            logger.debug(u"Edge [%s]=%s" % (self.al.dictEdges[x], x))

            if self.al.dictEdges[x].has_key(
                    u"source") and self.al.dictEdges[x].has_key(u"target"):

                typeEdge = self.al.dictEdges[x][ARCHI_TYPE]
                logger.debug(u"Edge   : %s" % typeEdge)

                source = self.al.dictEdges[x][u"source"]
                logger.debug(u"Source : %s" % source)

                target = self.al.dictEdges[x][u"target"]
                logger.debug(u"Target : %s" % target)

                logger.debug(u"  Rel    : %s" %
                             (self.al.dictEdges[x][ARCHI_TYPE]))

                sourceName = self.al.getNodeName(source)
                targetName = self.al.getNodeName(target)

                logger.debug(
                    u" %s--%s--%s" %
                    (sourceName, self.al.dictEdges[x][ARCHI_TYPE][10:],
                     targetName))

                if source in self.al.dictNodes:
                    l = list()
                    sc = concepts.addConceptKeyType(
                        sourceName, self.al.dictNodes[source][ARCHI_TYPE][10:])
                    # getWords(sourceName, sc)

                nameEdge = u"(" + sourceName + u"," + targetName + u")"
                logger.debug(u"nameEdge : %s[%d]" % (nameEdge, len(nameEdge)))
                logger.debug(u"typeEdge : %s" % typeEdge[10:])

                ne = str(self.al.cleanString(nameEdge))
                hl = hashlib.sha224(str(ne)).hexdigest()

                logger.debug(u"hash : %s" % hl)

                nh = u"%s-%s" % (typeEdge[10:], hl)

                rc = sc.addConceptKeyType(nh, typeEdge[10:])

                if self.al.dictNodes.has_key(target):
                    tc = rc.addConceptKeyType(
                        targetName, self.al.dictNodes[target][ARCHI_TYPE][10:])
                    # getWords(sourceName, tc)

        Concepts.saveConcepts(concepts, self.fileConceptsExport)

        return concepts
Пример #29
0
                #'BusinessFunction.csv',
                #'BusinessProcess.csv',
                #'Capability.csv',
                #'Entity.csv',
                #'Functionality.csv',
                #'ITService.csv',
                #'OrganizationalProcess.csv',
                #'Requirements.csv',
                #'SystemProcess.csv'
                #]

    #for f in fileList:
    #    logger.info("File: %s" %f)
    #    inputFile = homeDir + "\\Mega\\" + f
    
    wordsConcepts = logSemanticTerms (inputFile, outputFile)

    Concepts.saveConcepts(wordsConcepts, wordsFile)

    if GRAPH == True:
        graph.write_png('SemanticTerms.png')

        topic = "WORD"
        tc = TopicCloud(wordsConcepts, os.getcwd() + os.sep)

        tc.createCloudImage(topic, size_x=1200, size_y=900, numWords=50, scale=1)

        

    
Пример #30
0
    def dependancyAnalysis(self, listTSort):

        index = 0
        for x in listTSort:
            logger.debug(u"%d %s[%s] -%s-> %s[%s]" % (index, self.al.dictNodes[x[0]][u"name"], self.al.dictNodes[x[0]][ARCHI_TYPE], u"UsedBy",
                                                    self.al.dictNodes[x[1]][u"name"], self.al.dictNodes[x[1]][ARCHI_TYPE]))
            index += 1

            self.al.addToNodeDict(self.al.dictNodes[x[0]][u"name"], self.al.dictBP)
            self.al.addToNodeDict(self.al.dictNodes[x[1]][u"name"], self.al.dictBP)

        logger.info(u"Topic Sort Candidates : %d" % (len(listTSort)))

        nodes = list()
        index = 0
        dictTasks = dict()
        for x in listTSort:
            sname = self.al.dictNodes[x[0]][u"name"]
            tname = self.al.dictNodes[x[1]][u"name"]
            index += 1
            logger.debug(u"%d %s -%s-> %s" % (index, sname, u"UsedBy", tname))

            if sname in dictTasks:
                ln = dictTasks[sname]
                ln.append(tname)
            else:
                ln = list()
                ln.append(tname)
                dictTasks[sname] = ln

        for x in dictTasks.keys():
            logger.debug(u"dictTasks[%s]=%s" % (x, dictTasks[x]))
            a = Task(x, dictTasks[x])
            nodes.append(a)

        for x in self.al.dictBP.keys():
            # for x in listBP:
            if x not in dictTasks:
                logger.debug(u"Add %s" % (x))
                a = Task(x, list())
                nodes.append(a)

        self.format_nodes(nodes)

        conceptBatches = Concepts(u"Batch", u"archimate:WorkPackage")

        n = 0
        logger.info(u"Batches:")
        batches = self.get_task_batches(nodes)
        for bundle in batches:
            n += 1
            name = u"Batch %d" % n
            c = conceptBatches.addConceptKeyType(name, u"archimate:WorkPackage")
            for node in bundle:
                c.addConceptKeyType(node.name, u"archimate:BusinessProcess")

            logger.info(u"%d : %s" % (n, ", ".join(node.name.lstrip() for node in bundle)))

        Concepts.saveConcepts(conceptBatches, fileConceptsBatches)

        return conceptBatches
    def find_collocations(self):
        lemmatizer = WordNetLemmatizer()

        stopset = set(stop)
        filter_stops = lambda w: len(w) < 3 or w in stopset

        words = list()

        dictWords = dict()

        for document in self.concepts.getConcepts().values():
            logger.debug(document.name)
            for concept in document.getConcepts().values():
               logger.debug(concept.name)

               for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept.name)):
                    logger.debug(u"Word: " + word + u" POS: " + pos)
                    lemmaWord = lemmatizer.lemmatize(word.lower())
                    logger.debug(u"Word: " + word + u" Lemma: " + lemmaWord)
                    words.append(lemmaWord)

                    if pos[0] == u"N":
                        dictWords[lemmaWord] = word


        for x in dictWords.keys():
            logger.info(u"noun : %s" % x)

        bcf = BigramCollocationFinder.from_words(words)
        tcf = TrigramCollocationFinder.from_words(words)

        bcf.apply_word_filter(filter_stops)
        tcf.apply_word_filter(filter_stops)
        tcf.apply_freq_filter(3)

        listBCF = bcf.nbest(BigramAssocMeasures.likelihood_ratio, 100)

        for bigram in listBCF:
            concept = u' '.join([bg for bg in bigram])
            e = self.conceptsNGram.addConceptKeyType(concept, u"BiGram")
            logger.info(u"Bigram  : %s" % concept)
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept)):
                e.addConceptKeyType(word, pos)

        listTCF = tcf.nbest(TrigramAssocMeasures.likelihood_ratio, 100)

        for trigram in listTCF:
            concept = u' '.join([bg for bg in trigram])
            e = self.conceptsNGram.addConceptKeyType(concept, u"TriGram")
            logger.info(u"Trigram : %s" % concept)
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept)):
                e.addConceptKeyType(word, pos)
            
        bcfscored = bcf.score_ngrams(BigramAssocMeasures.likelihood_ratio)
        lt = sorted(bcfscored, key=lambda c: c[1], reverse=True)
        for score in lt:
            name = ' '.join([w for w in score[0]])
            count = float(score[1])
            e = self.conceptsNGramScore.addConceptKeyType(name, u"BiGram")
            for x in score[0]:
                e.addConceptKeyType(x, u"BWord")
            e.count = count
            logger.debug(u"bcfscored: %s=%s" % (name, count))

        tcfscored = tcf.score_ngrams(TrigramAssocMeasures.likelihood_ratio)
        lt = sorted(tcfscored, key=lambda c: c[1], reverse=True)
        for score in lt:
            name = ' '.join([w for w in score[0]])
            count = float(score[1])
            e = self.conceptsNGramScore.addConceptKeyType(name, u"TriGram")
            for x in score[0]:
                e.addConceptKeyType(x, u"TWord")
            e.count = count
            logger.debug(u"tcfscored: %s = %s" % (name, count))

        Concepts.saveConcepts(self.conceptsNGramScore, self.ngramScoreFile)
        Concepts.saveConcepts(self.conceptsNGram, self.ngramFile)

        for concept in self.conceptsNGram.getConcepts().values():
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept.name)):
                if pos[0] == u"N":
                    e = self.conceptsNGramSubject.addConceptKeyType(word, pos)
                    e.addConceptKeyType(concept.name, u"NGRAM")

        Concepts.saveConcepts(self.conceptsNGramSubject, self.ngramSubjectFile)
Пример #32
0
    def findSimilarties(self, conceptsSimilarityFile):

        logger.info(u"Compute Similarity")

        self.conceptsSimilarity = Concepts(u"ConceptsSimilarity", u"Similarities")

        # Compute similarity between documents / concepts
        similarityThreshold = similarity

        if THREAD:
            # startup the threads
            for threadID in range(0,ThreadDepth):
                thread = myThread(threadID, self)
                thread.start()
                self.threads.append(thread)

        for document in self.documentsList:
            indexNum = self.documentsList.index(document)

            logger.info(u"Document %s" % (self.df[indexNum]))

            pj = self.conceptsSimilarity.addConceptKeyType(self.df[indexNum], u"Document")

            logger.debug(u"  documentsList[%d] = %s" % (indexNum, str(document)))

            # Show common topics

            d = [unicode(x).strip().replace(u"'", u"") for x in document]
            e = [unicode(y).strip().replace(u"\"", u"") for y in self.listTopics]

            s1 = set(e)
            s2 = set(d)
            common = s1 & s2
            lc = [x for x in common]
            logger.debug(u"  Common Topics : %s" % (lc))

            if THREAD is False:

                self.doComputation(document, similarityThreshold, pj, Topics=True)

            else:
                logger.debug(u"npbtAquire  Queue Lock")
                queueLock.acquire()
                logger.debug(u"npbtPut     Queue     ")
                rl = [document, similarityThreshold]

                while workQueue.qsize() == QueueDepth:
                    time.sleep(1)

                workQueue.put(rl)
                queueLock.release()
                logger.debug(u"npbtRelease Queue Lock")

                qs = workQueue.qsize()
                if qs % QueueDelta == 0:
                    logger.info(u"rQueue Size = %s" % qs)

                # Wait for queue to empty
                qs = workQueue.qsize()
                while qs != 0:
                    time.sleep(1)
                    qs = workQueue.qsize()
                    if qs % QueueDelta == 0:
                        logger.info(u"wQueue Size = %s" % qs)

                # Notify threads it's time to exit
                exitFlag = 1

                # Wait for all threads to complete
                for t in self.threads:
                    logger.info(u"Waiting for thread %s to end..." % t)
                    t.join(0.5)

        Concepts.saveConcepts(self.conceptsSimilarity, conceptsSimilarityFile)

        # Concepts.outputConceptsToCSV(self.conceptsSimilarity, fileExport=u"BusinessRequirements.csv")

        logger.info(u"Complete - findSimilarties")

        return self.conceptsSimilarity