示例#1
0
    def parse_sentence(self, s):
        keywords = []

        # Lemmatize sentence and only keep verbs, nouns, dates and PTs
        l = Lemmatizer()
        lemmas = l.lemmatize(s)
        lemmas = l.filter(lemmas, ['V', 'N', 'W', 'PT'])

        # Normalize lemmas
        for l in lemmas:
            if l['tag'] == 'W':
                norm_lemma = l['lemma']
            else:
                norm_lemma = self.normalize(l['lemma'])

            if len(norm_lemma) > 0 and norm_lemma not in ignore_lemmas:
                keywords.append(norm_lemma)

        self.vprint("Keywords: ", keywords)

        return [self.crawler.getwordid(word) for word in keywords]
示例#2
0
    def parse_sentence(self, s):
        keywords = []

        # Lemmatize sentence and only keep verbs, nouns, dates and PTs
        l = Lemmatizer()
        lemmas = l.lemmatize(s)
        lemmas = l.filter(lemmas, ['V', 'N', 'W', 'PT'])

        # Normalize lemmas
        for l in lemmas:
            if l['tag'] == 'W':
                norm_lemma = l['lemma']
            else:
                norm_lemma = self.normalize(l['lemma'])

            if len(norm_lemma) > 0 and norm_lemma not in ignore_lemmas:
                keywords.append(norm_lemma)

        self.vprint("Keywords: ", keywords)

        return [self.crawler.getwordid(word) for word in keywords]