Пример #1
0
 def getSentences(self):
     if self.sentences:
         return self.sentences
     else:
         r = utils.getSentences(self.text)
         if r:
             self.sentences = r
             return self.sentences
Пример #2
0
 def getSentences(self):
     if self.sentences:
         return self.sentences
     else:
         r = utils.getSentences(self.text)
         if r:
             self.sentences = r
             return self.sentences
Пример #3
0
 def getSentences(self):
     if len(self.sentences)>0:
         return self.sentences
     else:
         if self.text:
             r = utils.getSentences(self.text)
             if len(r)>0:
                 self.sentences = [s for s in r]
                 return self.sentences
             else:
                 return []
Пример #4
0
 def getSentences(self):
     if len(self.sentences) > 0:
         return self.sentences
     else:
         if self.text:
             r = utils.getSentences(self.text)
             if len(r) > 0:
                 self.sentences = [s for s in r]
                 return self.sentences
             else:
                 return []
Пример #5
0
    def webpageEntities(self, docText=""):
        disasters = set(self.entities["Disaster"].keys())

        sentences = eventUtils.getSentences(docText)
        webpageEnts = []
        for sent in sentences:
            sentToks = eventUtils.getTokens(sent)
            if len(sentToks) > 100:
                continue
            intersect = eventUtils.getIntersection(disasters, sentToks)
            if len(intersect) > self.intersectionTh:
                #print intersect
                sentEnts = eventUtils.getEntities(sent)[0]
                if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                    sentEnts['Disaster'] = intersect
                    webpageEnts.append((sent, sentEnts))

        return webpageEnts
Пример #6
0
 def webpageEntities(self,docText=""):
     disasters=set(self.entities["Disaster"].keys())
     
     sentences = eventUtils.getSentences(docText)
     webpageEnts =[]
     for sent in sentences:
         sentToks = eventUtils.getTokens(sent)
         if len(sentToks) > 100:
             continue
         intersect = eventUtils.getIntersection(disasters, sentToks)
         if len(intersect) > self.intersectionTh:
             #print intersect
             sentEnts = eventUtils.getEntities(sent)[0]
             if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                 sentEnts['Disaster'] = intersect
                 webpageEnts.append((sent,sentEnts))
     
     return webpageEnts
Пример #7
0
def getEM_Sents(wps):
    docsEntities=[]
    docsEntitiesFreq = []
    entitiesProb = {}
    
    
    collSents = []
    #for i,wp in enumerate(wps):
    for wp in wps:
        if 'text' not in wp:
            continue
        wpContent = wp['text']+wp['title']
        wpSplit = wpContent.split('\n')
        wpFiltered = filter(None,wpSplit)
        wpContentf = '\n'.join(wpFiltered)
        sents = eventUtils.getSentences(wpContentf)
        collSents.append(sents)
    allSents = []
    for sents in collSents:
        allSents.extend(sents)
    fw = eventUtils.getFreqTokens(allSents)
    fw = [w[0] for w in fw]
    
    #collFilteredSents = []
    collEventModelInsts=[]
    for sents in collSents:
        filtEvtModelInsts = []
        for s in sents:
            sentToks = eventUtils.getTokens(s)
            cw = eventUtils.getIntersection(fw, sentToks)
            if len(cw) >= 2:
                emi = {}
                emi['TOPIC'] = list(cw)
                ents = eventUtils.getEntities(s)[0]
                if ents.has_key('LOCATION'):
                    emi['LOCATION'] = ents['LOCATION']
                    #filtEvtModelInsts.append(emi)
                if ents.has_key('DATE'):
                        #emi['TOPIC'] = cw
                    emi['DATE']=ents['DATE']
                filtEvtModelInsts.append(emi)
        collEventModelInsts.append(filtEvtModelInsts)
    '''
Пример #8
0
    def webpageEntities_old(self, docText=""):
        disasters = self.entities["Disaster"]

        sentences = eventUtils.getSentences(docText)
        #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0)
        #impSentences = []
        webpageEnts = []
        for sent in sentences:
            sentToks = eventUtils.getTokens(sent)
            if len(sentToks) > 100:
                continue
            intersect = eventUtils.getIntersection(disasters, sentToks)
            if len(intersect) > self.intersectionTh:
                #impSentences.append(sent)
                sentEnts = eventUtils.getEntities(sent)[0]
                if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                    sentEnts['Disaster'] = intersect
                    webpageEnts.append((sent, sentEnts))
        #entities = getEntities(impSentences)
        #webpageEnts = zip(impSentences,entities)

        return webpageEnts
Пример #9
0
 def webpageEntities_old(self,docText=""):
     disasters=self.entities["Disaster"]
     
     sentences = eventUtils.getSentences(docText)
     #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0)
     #impSentences = []
     webpageEnts =[]
     for sent in sentences:
         sentToks = eventUtils.getTokens(sent)
         if len(sentToks) > 100:
             continue
         intersect = eventUtils.getIntersection(disasters, sentToks)
         if len(intersect) > self.intersectionTh:
             #impSentences.append(sent)
             sentEnts = eventUtils.getEntities(sent)[0]
             if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                 sentEnts['Disaster'] = intersect
                 webpageEnts.append((sent,sentEnts))
     #entities = getEntities(impSentences)
     #webpageEnts = zip(impSentences,entities)
     
     return webpageEnts