def createNetFromSentences(self, sentences): "This function creates the network starting from a set of sentences" text_filter = TextFilter() for sentence in sentences: filtered_sentence = text_filter.filter_all(sentence) tokens = nltk.word_tokenize(filtered_sentence) single_tokens = list(set(tokens)) for token in single_tokens: if not self.gr.has_node(token): self.gr.add_node(str(token)) for i, token in enumerate(tokens): if i != 0: edge = (tokens[i - 1], token) if not self.gr.has_edge(edge): self.gr.add_edge(edge, wt=1.0, label=START_OCCURRENCES_NUM) else: #If the edge exists, the weight of the edge shall divided by the number of occurrences of the couple of terms. #Therefore, we shall keep memory of the number of occurrences for each couple of terms. number_of_occurrences = self.gr.edge_label(edge) new_number_of_occurrences = number_of_occurrences + 1 self.gr.set_edge_label(edge, new_number_of_occurrences) self.gr.set_edge_weight(edge, wt=1.0 / new_number_of_occurrences)
def createNetFromSentences(self, sentences): "This function creates the network starting from a set of sentences" text_filter = TextFilter() for sentence in sentences: filtered_sentence = text_filter.filter_all(sentence) tokens = nltk.word_tokenize(filtered_sentence) single_tokens = list(set(tokens)) for token in single_tokens: if not self.gr.has_node(token): self.gr.add_node(str(token)) for i, token in enumerate(tokens): if i != 0: edge = (tokens[i-1], token) if not self.gr.has_edge(edge): self.gr.add_edge(edge, wt=1.0, label = START_OCCURRENCES_NUM) else: #If the edge exists, the weight of the edge shall divided by the number of occurrences of the couple of terms. #Therefore, we shall keep memory of the number of occurrences for each couple of terms. number_of_occurrences = self.gr.edge_label(edge) new_number_of_occurrences = number_of_occurrences + 1 self.gr.set_edge_label(edge, new_number_of_occurrences) self.gr.set_edge_weight(edge, wt = 1.0/new_number_of_occurrences)
def __init__(self, modelID, inputXMLfilepath= "", modelType="", title="", objects=[]): ''' Constructor @param modelID: identifier of the model @param inputXMLfilepath: path to the input XML file containing the model if this parameter is left empty a new XML tree is created @param type: KAOS, TROPOS, or any other kind of model ''' self.textFilter = TextFilter() self.wordTokenizer = TreebankWordTokenizer() self.maxID = "100" #@todo: we have to set the current maximum to the actual maximum value #for the model self.modelInfo = ModelInfo(modelID) if not inputXMLfilepath == "": self.modelInfo.setLocation(inputXMLfilepath) self.tree = ET.parse(self.modelInfo.getLocation()) self.__loadModelInfo(self.modelInfo) self.modelGoals = self.__loadModelGoals() self.modelWords = self.__loadModelWords() self.modelStems = self.__loadModelStems() else: attributes = dict() attributes['type'] = modelType attributes['title'] = title attributes['object'] = objects root = Element("MODEL", attributes) self.tree = ElementTree(root)
def __init__(self, modelIndexManager): ''' @param modelIndex: reference to the place where the models are indexed ''' self.textFilter = TextFilter() self.modelIndexManager = modelIndexManager self.wordTokenizer = TreebankWordTokenizer() self.tRecommender = TransformationRecommender()
class TransformationRecommender(object): ''' This class recommends a transformation according to the model information ModelInfo object and the query issued ''' ''' This object is a Singleton, since it does not have private data but only functions: the code below defines a singleton ''' _instance = None def __new__(cls, *args, **kwargs): if not cls._instance: cls._instance = super(TransformationRecommender, cls).__new__( cls, *args, **kwargs) return cls._instance def __init__(self): self.tf = TextFilter() self.wordTokenizer = TreebankWordTokenizer() def getRecommendedTransformation(self, modelInfo, query): ''' If the input sentence is the same as the title except than in the title part that specifies the object, then "object change" shall be suggested ''' title = modelInfo.getName() titleFiltered = self.tf.filter_all_except_stem(title) titleToks = self.wordTokenizer.tokenize(titleFiltered) titleToksNoObj = [t for t in titleToks if t not in modelInfo.getObjects()] queryFiltered = self.tf.filter_all_except_stem(query) sentenceToks = self.wordTokenizer.tokenize(queryFiltered) if set(titleToksNoObj).issubset(sentenceToks): return OBJECT_CHANGE else: return ''
sentenceNetGediminas.write_graph('gediminas_graph.gv') print 'Gediminas size', len(sentenceNetGediminas.get_net().nodes()) sentenceNetRicci = SentenceNetCreator() sentenceNetRicci.createNet([fp3]) sentenceNetRicci.write_graph('ricci_graph.gv') print 'Ricci size', len(sentenceNetRicci.get_net().nodes()) sentenceNetWikiGedi = SentenceNetCreator() sentenceNetWikiGedi.createNet([fp4]) sentenceNetWikiGedi.write_graph('wikigedi_graph.gv') print 'WikiGedi size', len(sentenceNetWikiGedi.get_net().nodes()) print "Weighted Knowledge Graphs created" terms_filter = TextFilter() sentence = "The system shall display similar books" filtered_sent = terms_filter.filter_all(sentence) visitor_wiki = SentenceNetVisitor(sentenceNetWiki.get_net(), sentenceNetWiki.get_edge_start_weight(), sentenceNetWiki.get_start_occurrences_num()) path_wiki, path_weight_wiki = visitor_wiki.search_A_star(filtered_sent) print path_wiki print path_weight_wiki visitor_gediminas = SentenceNetVisitor(sentenceNetGediminas.get_net(), sentenceNetGediminas.get_edge_start_weight(), sentenceNetGediminas.get_start_occurrences_num()) path_gediminas, path_weight_gediminas = visitor_gediminas.search_A_star(filtered_sent) print path_gediminas print path_weight_gediminas
from SentenceNetVisitor import SentenceNetVisitor from XMLReqManager import XMLReqManager from SentenceNetCreator import SentenceNetCreator from irutils.TextFilter import TextFilter s1 = SentenceNetCreator() n1 = s1.get_net() v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(), s1.get_start_occurrences_num()) xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml') req_document = xml_doc_handler.get_requirements_text() terms_filter = TextFilter() for sent in req_document: filtered_sent = terms_filter.filter_all(sent) path1, path_weight1 = v1.search_A_star(filtered_sent) print 'now producing a random sentence according to the document learnt...' print v1.get_random_sentence('network', 100)
class RequirementsModel(object): ''' This class embeds the information residing in the XML of a requirements model passed as input parameter during construction ''' def __init__(self, modelID, inputXMLfilepath= "", modelType="", title="", objects=[]): ''' Constructor @param modelID: identifier of the model @param inputXMLfilepath: path to the input XML file containing the model if this parameter is left empty a new XML tree is created @param type: KAOS, TROPOS, or any other kind of model ''' self.textFilter = TextFilter() self.wordTokenizer = TreebankWordTokenizer() self.maxID = "100" #@todo: we have to set the current maximum to the actual maximum value #for the model self.modelInfo = ModelInfo(modelID) if not inputXMLfilepath == "": self.modelInfo.setLocation(inputXMLfilepath) self.tree = ET.parse(self.modelInfo.getLocation()) self.__loadModelInfo(self.modelInfo) self.modelGoals = self.__loadModelGoals() self.modelWords = self.__loadModelWords() self.modelStems = self.__loadModelStems() else: attributes = dict() attributes['type'] = modelType attributes['title'] = title attributes['object'] = objects root = Element("MODEL", attributes) self.tree = ElementTree(root) def __loadModelInfo(self, modelInfo): ''' This function load the name of the model from the "title" field of the MODEL tag, together with the type and the objects, and stores these information in the ModelInfo object ''' root = self.tree.getroot() modelInfo.setName(self.textFilter.lower_all(root.get("title"))) modelInfo.setType(self.textFilter.lower_all(root.get("type"))) objects = root.get("object").strip().split(OBJ_SEPARATOR) lowercaseObjects = [self.textFilter.lower_all(o) for o in objects] modelInfo.setObjects(lowercaseObjects) def __loadModelGoals(self): ''' The function loads the goal names included in the model and returns a list with all the goals of the model. The goals names are stored as lowercase goals ''' root = self.tree.getroot() goalNames = list() for child in root.iter('ENTITY'): if child.attrib['type'] == 'goal': goalNames.append(self.textFilter.lower_all(child.attrib['name'])) return goalNames def __loadModelWords(self): ''' The function loads the words included in the model and returns a dictionary with all the words of the model and their frequency ''' tokenizedWords = dict() if not self.modelGoals == None: for name in self.modelGoals: nameFiltered = self.textFilter.filter_all_except_stem(name) words = self.wordTokenizer.tokenize(nameFiltered) for word in words: if not tokenizedWords.has_key(word): tokenizedWords[word] = 1 else: tokenizedWords[word] = tokenizedWords[word] + 1 return tokenizedWords def __loadModelStems(self): ''' The function loads the stems included in the model and returns a dictionary with all the stems of the model and their frequency ''' tokenizedStems = dict() if not self.modelWords == None: for w in self.modelWords.keys(): stem = self.textFilter.filter_all(w) if not tokenizedStems.has_key(stem): tokenizedStems[stem] = self.modelWords[w] else: tokenizedStems[stem] = tokenizedStems[stem] + self.modelWords[w] return tokenizedStems def __getModelStems(self): return self.modelStems.keys() def __getModelWords(self): return self.modelWords.keys() def __getModelGoals(self): return self.modelGoals def __getModelStemsAndFreq(self): return self.modelStems def __getModelWordsAndFreq(self): return self.modelWords def getModelInfo(self): return self.modelInfo def getModelID(self): return self.modelInfo.getId() def getModelKeys(self, keyType): if keyType == STEM_STRING: return self.__getModelStems() if keyType == WORD_STRING: return self.__getModelWords() if keyType == GOAL_STRING: return self.__getModelGoals() def getModelKeysAndFrequencies(self, keyType): if keyType == STEM_STRING: return self.__getModelStemsAndFreq() if keyType == WORD_STRING: return self.__getModelWordsAndFreq() if keyType == GOAL_STRING: return dict(zip(self.__getModelGoals()), [1] * (len(self.__getModelGoals())) ) def changeTitle(self, newTitle): ''' This function shall change the title of the model, which means changing the modelInfo and the XML of the model ''' #self.modelInfo.setName(newTitle) root = self.tree.getroot() root.set("title", newTitle) self.__loadModelInfo(self.modelInfo) #the function updates the modelInfo structure def changeObjects(self, newObjectsList): ''' This function shall change the objects of the model, which means changing the modelInfo but also the XML of the model ''' root = self.tree.getroot() newObjects = ' ,'.join([o for o in newObjectsList]) root.set("object", newObjects) self.__loadModelInfo(self.modelInfo) def changeGoalName(self, goalID, newGoalName): ''' @param goalID: ID of the goal that shall have a new name @param newGoalName: string representing the new name of the goal ''' root = self.tree.getroot() for child in root.iter('ENTITY'): if child.attrib['type'] == 'goal' and child.attrib['id'] == goalID: child.attrib['name'] = newGoalName def searchGoalByName(self, goalName): ''' @param goalName: name of the goal to be searched return: goalID, which is the unique ID of the goal, if the goal exist -1, if the goal is not found ''' root = self.tree.getroot() for child in root.iter('ENTITY'): if child.attrib['type'] == 'goal' and child.attrib['name'] == goalName: return child.attrib['id'] return -1 def searchGoalsBySubstring(self, goalSubstring, caseSensitive = "NO"): ''' @param goalSubstring: a substring that shall be searched among the goal names. By default the search is not case sensitive return: a list with the couples [ID, goalName] of the goals that include the @param goalSubstring ''' root = self.tree.getroot() goalDict = dict() for child in root.iter('ENTITY'): if child.attrib['type'] == 'goal': if caseSensitive == "NO": if self.textFilter.lower_all(goalSubstring) in self.textFilter.lower_all(child.attrib['name']): goalDict[child.attrib['id']] = child.attrib['name'] else: if goalSubstring in child.attrib['name']: goalDict[child.attrib['id']] = child.attrib['name'] return goalDict def __assignUniqueIDs(self, treeRoot): ''' This function assigns unique IDs to all the objects of type ENTITY in @param tree ''' currentMaxId = self.maxID for child in treeRoot.iter('ENTITY'): currentMaxId = str( int(currentMaxId) + 1 ) child.attrib['id'] = currentMaxId self.maxID = currentMaxId def insertTree(self, parentID, childTree): ''' Given a @param childTree, which is a tree or a node, this is added as a child of parentID below the first refinement of the parent. The assumption here is that each parent can have ONLY ONE TYPE of refinement. The unique IDs to the child elements are dynamically assigned by the function. The childTree could be also a single node. ''' root = self.tree.getroot() for child in root.iter('ENTITY'): if child.attrib['id'] == parentID: refinement = child.findall("REFINEMENT") if refinement and len(refinement) == 1: #ONLY ONE TYPE of refinement is allowed for each element self.__assignUniqueIDs(childTree) refinement[0].append(childTree) return def saveModelAs(self, destinationFilePath): ''' @param destinationFilePath: path of the file where the model shall be saved. @todo: currently the model is saved to another location and the original location is lost. Therefore, the model currently keeps the same ID. We have to change this behaviour. ''' self.modelInfo.setLocation(destinationFilePath) self.saveModel() def saveModel(self): ''' Save the model in the same destination as the input folder and with the original name ''' try: self.tree.write(self.modelInfo.getLocation()) except IOError: print "IOError: Saving to a path that does not exist! Use saveModelAs() instead" except: print "An error occurred"
class QueryManager(object): ''' Given a specification query, this object returns a set of models together with possible transformations that can be applied to the model to address the satisfy the specification query ''' def __init__(self, modelIndexManager): ''' @param modelIndex: reference to the place where the models are indexed ''' self.textFilter = TextFilter() self.modelIndexManager = modelIndexManager self.wordTokenizer = TreebankWordTokenizer() self.tRecommender = TransformationRecommender() def __parseQuery(self, queryString): ''' This function returns the words included in queryString, after filtering all the stopwords, performing stemmming and applying all the filters provided by textFilter @param queryString: the specification query in the form of a string ''' filteredQueryString = self.textFilter.filter_all(queryString) return self.wordTokenizer.tokenize(filteredQueryString) def issueQuery(self, queryString): ''' This is the main function of this class. Given the specification query, the function parses the specification and returns a set of QueryResult objects, which include the link to the models @param queryString: the specification query in the form of a string @return: a list of QueryResult objects. ''' qr = list() stems = self.__parseQuery(queryString) for stem in stems: modelsInfos = self.modelIndexManager.searchModels(stem, STEM_STRING) #modelsTransformationsList = [(model, "object change") for model in models] #results[stem] = modelsTransformationsList if not modelsInfos == None: for modelInfo in modelsInfos: score = 0.1 transformation = self.tRecommender.getRecommendedTransformation(modelInfo, queryString) qr.append(QueryResult(modelInfo, [transformation], score)) qr.sort(key=lambda x: x.score) #the list is ordered by the score attribute and reversed qr.reverse() ''' @todo: for each model we shall understand which is the best transformation. To this end, an additional class is required. Currently, we always add the object change transformation together with each model found. ''' return qr
from SentenceNetVisitor import SentenceNetVisitor from XMLReqManager import XMLReqManager from SentenceNetCreator import SentenceNetCreator from irutils.TextFilter import TextFilter s1 = SentenceNetCreator() n1 = s1.get_net() v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(), s1.get_start_occurrences_num()) xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml') req_document = xml_doc_handler.get_requirements_text() terms_filter = TextFilter() for sent in req_document: filtered_sent = terms_filter.filter_all(sent) filtered_sent = terms_filter.remove_item(filtered_sent, "\"") filtered_sent = terms_filter.remove_item(filtered_sent, "-") print filtered_sent v1.search_A_star(filtered_sent) s1.write_graph("eireneGraph.gv")
def __init__(self): self.tf = TextFilter() self.wordTokenizer = TreebankWordTokenizer()