Python TextFilter示例，irutils.TextFilter.TextFilter Python示例

示例#1

0

显示文件

文件： SentenceNetCreator.py 项目： alessioferrari/workspaceAmbiguityDetection

    def createNetFromSentences(self, sentences):
        "This function creates the network starting from a set of sentences"

        text_filter = TextFilter()
        for sentence in sentences:
            filtered_sentence = text_filter.filter_all(sentence)

            tokens = nltk.word_tokenize(filtered_sentence)
            single_tokens = list(set(tokens))

            for token in single_tokens:
                if not self.gr.has_node(token):
                    self.gr.add_node(str(token))

            for i, token in enumerate(tokens):
                if i != 0:
                    edge = (tokens[i - 1], token)
                    if not self.gr.has_edge(edge):
                        self.gr.add_edge(edge,
                                         wt=1.0,
                                         label=START_OCCURRENCES_NUM)
                    else:
                        #If the edge exists, the weight of the edge shall divided by the number of occurrences of the couple of terms.
                        #Therefore, we shall keep memory of the number of occurrences for each couple of terms.
                        number_of_occurrences = self.gr.edge_label(edge)
                        new_number_of_occurrences = number_of_occurrences + 1
                        self.gr.set_edge_label(edge, new_number_of_occurrences)
                        self.gr.set_edge_weight(edge,
                                                wt=1.0 /
                                                new_number_of_occurrences)

示例#2

0

显示文件

文件： SentenceNetCreator.py 项目： alessioferrari/workspaceAmbiguityDetection

 def createNetFromSentences(self, sentences):
     "This function creates the network starting from a set of sentences"
     
     text_filter = TextFilter()
     for sentence in sentences:
         filtered_sentence = text_filter.filter_all(sentence)
         
         
         tokens = nltk.word_tokenize(filtered_sentence)
         single_tokens = list(set(tokens))
         
         for token in single_tokens:
             if not self.gr.has_node(token):
                 self.gr.add_node(str(token))
         
         for i, token in enumerate(tokens):
             if i != 0:
                 edge = (tokens[i-1], token)
                 if not self.gr.has_edge(edge):
                     self.gr.add_edge(edge, wt=1.0, label = START_OCCURRENCES_NUM)
                 else:
                     #If the edge exists, the weight of the edge shall divided by the number of occurrences of the couple of terms.
                     #Therefore, we shall keep memory of the number of occurrences for each couple of terms.
                     number_of_occurrences = self.gr.edge_label(edge)
                     new_number_of_occurrences = number_of_occurrences + 1
                     self.gr.set_edge_label(edge, new_number_of_occurrences)
                     self.gr.set_edge_weight(edge, wt = 1.0/new_number_of_occurrences)

示例#3

0

显示文件

文件： RequirementsModel.py 项目： alessioferrari/tram

 def __init__(self, modelID, inputXMLfilepath= "", modelType="", title="", objects=[]):
     '''
     Constructor
     @param modelID: identifier of the model
     @param inputXMLfilepath: path to the input XML file containing the model 
     if this parameter is left empty a new XML tree is created
     @param type: KAOS, TROPOS, or any other kind of model
     '''
     self.textFilter = TextFilter()
     self.wordTokenizer = TreebankWordTokenizer()        
     self.maxID = "100"  #@todo: we have to set the current maximum to the actual maximum value
                         #for the model
     self.modelInfo = ModelInfo(modelID)
     
     if not inputXMLfilepath == "":
         
         self.modelInfo.setLocation(inputXMLfilepath)
         
         self.tree =  ET.parse(self.modelInfo.getLocation())
     
         self.__loadModelInfo(self.modelInfo)
         self.modelGoals = self.__loadModelGoals()
         self.modelWords = self.__loadModelWords()
         self.modelStems = self.__loadModelStems()
     else:
         attributes = dict()
         attributes['type'] = modelType
         attributes['title'] = title
         attributes['object'] = objects
         root = Element("MODEL", attributes)
         self.tree = ElementTree(root)

示例#4

0

显示文件

文件： QueryManager.py 项目： alessioferrari/tram

 def __init__(self, modelIndexManager):
     '''
     @param modelIndex: reference to the place where the models are indexed
     '''
     self.textFilter = TextFilter()
     self.modelIndexManager = modelIndexManager
     self.wordTokenizer = TreebankWordTokenizer()
     self.tRecommender = TransformationRecommender()

示例#5

0

显示文件

文件： TransformationRecommender.py 项目： alessioferrari/tram

class TransformationRecommender(object):
    '''
    This class recommends a transformation according
    to the model information ModelInfo object and the query issued
    '''
    
    '''
    This object is a Singleton, since it does not have private data
    but only functions: the code below defines a singleton
    '''
    _instance = None
    
    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(TransformationRecommender, cls).__new__(
                                cls, *args, **kwargs)
        return cls._instance
    
    def __init__(self):
        self.tf = TextFilter()
        self.wordTokenizer = TreebankWordTokenizer()

    def getRecommendedTransformation(self, modelInfo, query):
        '''
        If the input sentence is the same as the title except than in the title
        part that specifies the object, then "object change" shall be suggested
        '''
        title = modelInfo.getName()
        titleFiltered = self.tf.filter_all_except_stem(title)
        titleToks = self.wordTokenizer.tokenize(titleFiltered)
        
        titleToksNoObj = [t for t in titleToks if t not in modelInfo.getObjects()]
        
        queryFiltered = self.tf.filter_all_except_stem(query)
        sentenceToks = self.wordTokenizer.tokenize(queryFiltered)
        
        if set(titleToksNoObj).issubset(sentenceToks):
            return OBJECT_CHANGE
        else:
            return ''

示例#6

0

显示文件

文件： create_reduced_graph.py 项目： alessioferrari/workspaceAmbiguityDetection

sentenceNetGediminas.write_graph('gediminas_graph.gv')
print 'Gediminas size', len(sentenceNetGediminas.get_net().nodes())

sentenceNetRicci = SentenceNetCreator()
sentenceNetRicci.createNet([fp3])
sentenceNetRicci.write_graph('ricci_graph.gv')
print 'Ricci size', len(sentenceNetRicci.get_net().nodes())

sentenceNetWikiGedi = SentenceNetCreator()
sentenceNetWikiGedi.createNet([fp4])
sentenceNetWikiGedi.write_graph('wikigedi_graph.gv')
print 'WikiGedi size', len(sentenceNetWikiGedi.get_net().nodes())

print "Weighted Knowledge Graphs created"

terms_filter = TextFilter()
sentence = "The system shall display similar books"
filtered_sent = terms_filter.filter_all(sentence)

visitor_wiki = SentenceNetVisitor(sentenceNetWiki.get_net(), sentenceNetWiki.get_edge_start_weight(), sentenceNetWiki.get_start_occurrences_num())
path_wiki, path_weight_wiki = visitor_wiki.search_A_star(filtered_sent)

print path_wiki
print path_weight_wiki

visitor_gediminas = SentenceNetVisitor(sentenceNetGediminas.get_net(), sentenceNetGediminas.get_edge_start_weight(), sentenceNetGediminas.get_start_occurrences_num())
path_gediminas, path_weight_gediminas = visitor_gediminas.search_A_star(filtered_sent)

print path_gediminas
print path_weight_gediminas

示例#7

0

显示文件

文件： sentence_generator.py 项目： alessioferrari/workspaceAmbiguityDetection

from SentenceNetVisitor import SentenceNetVisitor
from XMLReqManager import XMLReqManager
from SentenceNetCreator import SentenceNetCreator
from irutils.TextFilter import TextFilter

s1 = SentenceNetCreator()
n1 = s1.get_net()
v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(),
                        s1.get_start_occurrences_num())

xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml')
req_document = xml_doc_handler.get_requirements_text()

terms_filter = TextFilter()

for sent in req_document:
    filtered_sent = terms_filter.filter_all(sent)
    path1, path_weight1 = v1.search_A_star(filtered_sent)

print 'now producing a random sentence according to the document learnt...'
print v1.get_random_sentence('network', 100)

示例#8

0

显示文件

文件： RequirementsModel.py 项目： alessioferrari/tram

class RequirementsModel(object):
    '''
    This class embeds the information residing in the XML
    of a requirements model passed as input parameter
    during construction 
    '''

    def __init__(self, modelID, inputXMLfilepath= "", modelType="", title="", objects=[]):
        '''
        Constructor
        @param modelID: identifier of the model
        @param inputXMLfilepath: path to the input XML file containing the model 
        if this parameter is left empty a new XML tree is created
        @param type: KAOS, TROPOS, or any other kind of model
        '''
        self.textFilter = TextFilter()
        self.wordTokenizer = TreebankWordTokenizer()        
        self.maxID = "100"  #@todo: we have to set the current maximum to the actual maximum value
                            #for the model
        self.modelInfo = ModelInfo(modelID)
        
        if not inputXMLfilepath == "":
            
            self.modelInfo.setLocation(inputXMLfilepath)
            
            self.tree =  ET.parse(self.modelInfo.getLocation())
        
            self.__loadModelInfo(self.modelInfo)
            self.modelGoals = self.__loadModelGoals()
            self.modelWords = self.__loadModelWords()
            self.modelStems = self.__loadModelStems()
        else:
            attributes = dict()
            attributes['type'] = modelType
            attributes['title'] = title
            attributes['object'] = objects
            root = Element("MODEL", attributes)
            self.tree = ElementTree(root)
    
    def __loadModelInfo(self, modelInfo):
        '''
        This function load the name of the model from the "title" field of the MODEL tag,
        together with the type and the objects, and stores these information in the 
        ModelInfo object
        '''
        root = self.tree.getroot()
        
        modelInfo.setName(self.textFilter.lower_all(root.get("title")))
        modelInfo.setType(self.textFilter.lower_all(root.get("type")))
        
        objects = root.get("object").strip().split(OBJ_SEPARATOR)
        lowercaseObjects = [self.textFilter.lower_all(o) for o in objects]
        modelInfo.setObjects(lowercaseObjects)   
    
    
    def __loadModelGoals(self):
        '''
        The function loads the goal names included in the model
        and returns a list with all the goals of the model.
        The goals names are stored as lowercase goals
        '''  
        root = self.tree.getroot()
        goalNames = list()

        for child in root.iter('ENTITY'):
            if child.attrib['type'] == 'goal': 
                goalNames.append(self.textFilter.lower_all(child.attrib['name'])) 
                    
        return goalNames
        
        
    def __loadModelWords(self):
        '''
        The function loads the words included in the model
        and returns a dictionary with all the words of the model
        and their frequency
        '''
               
        tokenizedWords = dict()

        if not self.modelGoals == None:
            for name in self.modelGoals:
                nameFiltered = self.textFilter.filter_all_except_stem(name)
                words = self.wordTokenizer.tokenize(nameFiltered)
                for word in words:
                    if not tokenizedWords.has_key(word): 
                        tokenizedWords[word] = 1
                    else:
                        tokenizedWords[word] = tokenizedWords[word] + 1
                    
        return tokenizedWords
        
    def __loadModelStems(self):
        '''
        The function loads the stems included in the model
        and returns a dictionary with all the stems of the model
        and their frequency
        ''' 
        tokenizedStems = dict()
        
        if not self.modelWords == None:
            for w in self.modelWords.keys():
                stem = self.textFilter.filter_all(w)
                if not tokenizedStems.has_key(stem):
                    tokenizedStems[stem] = self.modelWords[w]
                else:
                    tokenizedStems[stem] = tokenizedStems[stem] + self.modelWords[w]
                    
        return tokenizedStems
        
    def __getModelStems(self):
        return self.modelStems.keys()
    
    def __getModelWords(self):
        return self.modelWords.keys()
    
    def __getModelGoals(self):
        return self.modelGoals
    
    def __getModelStemsAndFreq(self):
        return self.modelStems
    
    def __getModelWordsAndFreq(self):
        return self.modelWords
    
    def getModelInfo(self):
        return self.modelInfo
    
    def getModelID(self):
        return self.modelInfo.getId()
    
    def getModelKeys(self, keyType):
        if keyType == STEM_STRING:
            return self.__getModelStems()
        if keyType == WORD_STRING:
            return self.__getModelWords()
        if keyType == GOAL_STRING:
            return self.__getModelGoals() 
        
    def getModelKeysAndFrequencies(self, keyType):
        if keyType == STEM_STRING:
            return self.__getModelStemsAndFreq()
        if keyType == WORD_STRING:
            return self.__getModelWordsAndFreq()
        if keyType == GOAL_STRING:
            return dict(zip(self.__getModelGoals()), [1] * (len(self.__getModelGoals())) )
            
        
    def changeTitle(self, newTitle):
        '''
        This function shall change the title of the model, 
        which means changing the modelInfo and the XML
        of the model
        '''
        #self.modelInfo.setName(newTitle)
        
        root = self.tree.getroot()
        root.set("title", newTitle)
        self.__loadModelInfo(self.modelInfo) #the function updates the modelInfo structure
    
    def changeObjects(self, newObjectsList):
        '''
        This function shall change the objects of the model,
        which means changing the modelInfo 
        but also the XML of the model
        '''
        
        root = self.tree.getroot()
        
        newObjects = ' ,'.join([o for o in newObjectsList])
        root.set("object", newObjects)  
        self.__loadModelInfo(self.modelInfo)
        
    def changeGoalName(self, goalID, newGoalName):
        '''
        @param goalID: ID of the goal that shall have a new name
        @param newGoalName: string representing the new name of the goal  
        '''
        root = self.tree.getroot()

        for child in root.iter('ENTITY'):
            if child.attrib['type'] == 'goal' and child.attrib['id'] == goalID:
                child.attrib['name'] = newGoalName 
        
    def searchGoalByName(self, goalName):
        '''
        @param goalName: name of the goal to be searched
        return: goalID, which is the unique ID of the goal, if the goal exist
                -1, if the goal is not found
        '''
        root = self.tree.getroot()

        for child in root.iter('ENTITY'):
            if child.attrib['type'] == 'goal' and child.attrib['name'] == goalName:
                return child.attrib['id']
        
        return -1 
    
    def searchGoalsBySubstring(self, goalSubstring, caseSensitive = "NO"):
        '''
        @param goalSubstring: a substring that shall be searched among the goal names. 
        By default the search is not case sensitive
        return: a list with the couples [ID, goalName] of the goals that include the @param goalSubstring
        '''
        root = self.tree.getroot()
        goalDict = dict()

        for child in root.iter('ENTITY'):
            if child.attrib['type'] == 'goal': 
                if caseSensitive == "NO":
                    if self.textFilter.lower_all(goalSubstring) in self.textFilter.lower_all(child.attrib['name']):
                        goalDict[child.attrib['id']] = child.attrib['name']
                else:
                    if goalSubstring in child.attrib['name']:
                        goalDict[child.attrib['id']] = child.attrib['name']
                
        
        return goalDict
    
    def __assignUniqueIDs(self, treeRoot):
        '''
        This function assigns unique IDs to all the objects 
        of type ENTITY in @param tree
        '''
        currentMaxId = self.maxID
        for child in treeRoot.iter('ENTITY'):
            currentMaxId = str( int(currentMaxId) + 1 )
            child.attrib['id'] = currentMaxId
            
        self.maxID = currentMaxId
    
    def insertTree(self, parentID, childTree):
        '''
        Given a @param childTree, which is a tree or a node, this is added as a child of parentID
        below the first refinement of the parent. 
        The assumption here is that each parent can have ONLY ONE TYPE of refinement.
        The unique IDs to the child elements are dynamically assigned by the function. 
        The childTree could be also a single node.
        '''
        root = self.tree.getroot()
        
        for child in root.iter('ENTITY'):
            if child.attrib['id'] == parentID:
                refinement = child.findall("REFINEMENT")
                if refinement and len(refinement) == 1: #ONLY ONE TYPE of refinement is allowed for each element
                    self.__assignUniqueIDs(childTree)
                    refinement[0].append(childTree)
                    return

    def saveModelAs(self, destinationFilePath):
        '''
        @param destinationFilePath: path of the file where the model shall be saved.
        @todo: currently the model is saved to another location and the original location
        is lost. Therefore, the model currently keeps the same ID. We have to change
        this behaviour. 
        '''
        self.modelInfo.setLocation(destinationFilePath) 
        self.saveModel()
        
    def saveModel(self):
        '''
        Save the model in the same destination as the input folder
        and with the original name
        '''
        try:
            self.tree.write(self.modelInfo.getLocation())
        except IOError:
            print "IOError: Saving to a path that does not exist! Use saveModelAs() instead"
        except:
            print "An error occurred"

示例#9

0

显示文件

文件： QueryManager.py 项目： alessioferrari/tram

class QueryManager(object):
    '''
    Given a specification query, this object returns a set of models
    together with possible transformations that can be applied to the model
    to address the satisfy the specification query
    '''


    def __init__(self, modelIndexManager):
        '''
        @param modelIndex: reference to the place where the models are indexed
        '''
        self.textFilter = TextFilter()
        self.modelIndexManager = modelIndexManager
        self.wordTokenizer = TreebankWordTokenizer()
        self.tRecommender = TransformationRecommender() 
        
    def __parseQuery(self, queryString):
        '''
        This function returns the words included in queryString,
        after filtering all the stopwords, performing stemmming
        and applying all the filters provided by textFilter
        @param queryString: the specification query in the form of a string 
        ''' 
        filteredQueryString = self.textFilter.filter_all(queryString)
        return self.wordTokenizer.tokenize(filteredQueryString)
        
    def issueQuery(self, queryString):
        '''
        This is the main function of this class. Given the specification
        query, the function parses the specification and returns a
        set of QueryResult objects, which include the link to the models
        @param queryString: the specification query in the form of a string
        @return: a list of QueryResult objects.
        '''
        qr = list()
        
        stems = self.__parseQuery(queryString)
        for stem in stems:
            
            modelsInfos = self.modelIndexManager.searchModels(stem, STEM_STRING)
            
            #modelsTransformationsList = [(model, "object change") for model in models]
            #results[stem] = modelsTransformationsList
        
            if not modelsInfos == None:
                for modelInfo in modelsInfos:
                    score = 0.1
                    transformation = self.tRecommender.getRecommendedTransformation(modelInfo, queryString)
                    qr.append(QueryResult(modelInfo, [transformation], score))
                    
            qr.sort(key=lambda x: x.score) #the list is ordered by the score attribute and reversed
            qr.reverse()
        
        '''
        @todo: for each model we shall understand which is the best transformation.
        To this end, an additional class is required.
        Currently, we always add the object change transformation together 
        with each model found. 
        '''
        
        return qr

示例#10

0

显示文件

from SentenceNetVisitor import SentenceNetVisitor
from XMLReqManager import XMLReqManager
from SentenceNetCreator import SentenceNetCreator
from irutils.TextFilter import TextFilter

s1 = SentenceNetCreator()
n1 = s1.get_net()
v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(),
                        s1.get_start_occurrences_num())

xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml')
req_document = xml_doc_handler.get_requirements_text()

terms_filter = TextFilter()

for sent in req_document:
    filtered_sent = terms_filter.filter_all(sent)
    filtered_sent = terms_filter.remove_item(filtered_sent, "\"")
    filtered_sent = terms_filter.remove_item(filtered_sent, "-")
    print filtered_sent
    v1.search_A_star(filtered_sent)

s1.write_graph("eireneGraph.gv")

示例#11

0

显示文件

文件： visualize_sentence_net.py 项目： alessioferrari/workspaceAmbiguityDetection

from SentenceNetVisitor import SentenceNetVisitor
from XMLReqManager import XMLReqManager
from SentenceNetCreator import SentenceNetCreator
from irutils.TextFilter import TextFilter


s1 = SentenceNetCreator()
n1 = s1.get_net()
v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(), s1.get_start_occurrences_num())

xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml')
req_document = xml_doc_handler.get_requirements_text()

terms_filter = TextFilter()

for sent in req_document:
    filtered_sent = terms_filter.filter_all(sent)
    filtered_sent = terms_filter.remove_item(filtered_sent, "\"")
    filtered_sent = terms_filter.remove_item(filtered_sent, "-")
    print filtered_sent
    v1.search_A_star(filtered_sent)

s1.write_graph("eireneGraph.gv")

示例#12

0

显示文件

文件： TransformationRecommender.py 项目： alessioferrari/tram

 def __init__(self):
     self.tf = TextFilter()
     self.wordTokenizer = TreebankWordTokenizer()