Exemplo n.º 1
0
 def _add_category(self, file_name, tag):
     """ Loads the terms into the taxonomy with the data from the specified
     file and assigns them the specified tag
     """
     with open(file_name) as f:
         for term in f:
             taxonomy.append(term.rstrip('\n'), type=tag)
Exemplo n.º 2
0
def load_taxonomy(filename, use_stem=True):    
    stemmer = nltk.stem.PorterStemmer()
    
    taxdf = pd.read_csv(filename)
    for i,row in taxdf.iterrows():        
        for synonym in row['Syns'].split(','):
            if use_stem:
                taxonomy.append(stemmer.stem(synonym), type=row['Category'])
            else:
                taxonomy.append(synonym, type=row['Category'])
                
        taxonomy.append(row['Category'], type=row['Parent'])
Exemplo n.º 3
0
import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.search import search, taxonomy, Classifier
from pattern.en import parsetree

# The search module includes a Taxonomy class
# that can be used to define semantic word types.
# For example, consider that you want to extract flower names from a text.
# This would make search patterns somewhat unwieldy:
# search("rose|lily|daisy|daffodil|begonia", txt).

# A better approach is to use the taxonomy:
for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
    taxonomy.append(flower, type="flower")

print taxonomy.children("flower")
print taxonomy.parents("rose")
print taxonomy.classify("rose")  # Yields the most recently added parent.
print

# Taxonomy terms can be included in a pattern by using uppercase:
t = parsetree("A field of white daffodils.", lemmata=True)
m = search("FLOWER", t)
print t
print m
print

# Another example:
taxonomy.append("chicken", type="food")
taxonomy.append("chicken", type="bird")
Exemplo n.º 4
0
# Import itertools for permutations
import itertools

# Import Pattern Search and English modules
from pattern.search import search, taxonomy
from pattern.en import parsetree

# Import (and print out) test strings for pattern matching
# import test_strings

# Define Taxonomy CAUSALV1 for verbs: e.g., cause*
causal_verb_list1 = ['causes', 'caused']
for c in causal_verb_list1:
    taxonomy.append(c, type='CAUSALV1')
# Consider adding other verb tenses based on cause here as appropriate


# Define Taxonomy for CAUSALV2 for simple causal verbs tagged as nouns
# in their simple present form, e.g., cause
causal_verb_list2 = ['cause']
for c in causal_verb_list2:
    taxonomy.append(c, type='CAUSALV2')
# Consider adding other verb tenses based on cause here as appropriate.
# This includes verb tenses where subject/cause noun phrase is plural.

###############################################################################
# Cause-effect patterns: statements where cause precedes the effect
###############################################################################
# Manually-defined noun phrase definitions
# Consider adding noun phrase chunk from NLTK book, which has optional
# determiner, etc.
Exemplo n.º 5
0
import os, sys; sys.path.append(os.path.join("..", "..", ".."))

from pattern.search import Pattern, Constraint, Classifier, taxonomy
from pattern.en     import Sentence, parse

# The search module includes a Taxonomy class 
# that can be used to define semantic word types.
# For example, consider that you want to extract flower names from a text.
# This would make patterns somewhat unwieldy, e.g.:
# Pattern.fromstring("rose|lily|daisy|daffodil|begonia").

# A better approach is to use the taxonomy:
for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
    taxonomy.append(flower, type="flower")
    
print taxonomy.children("flower")
print taxonomy.parents("rose")
print taxonomy.classify("rose") # Yields the most recently added parent.
print
    
# Taxonomy terms can be included in a pattern:
p = Pattern([Constraint(taxa=["flower"])]) # or
p = Pattern.fromstring("FLOWER")

s = Sentence(parse("A field of white daffodils.", lemmata=True))
m = p.search(s)
print s
print m
print

from pattern.search import search
Exemplo n.º 6
0
    def get_arrets(self):
        """
        Cette methode prend en parametre une chaine de caractere et retourne, s'il y en a, le nom des arrets 
        """
        # initialisation
        stop_found = []
        irrelevent = ['GARE', 'SAINT', 'SAINTE']
        accepted_tags = ['NN', 'NNP', 'NNS']
        stop = self.Lex_learn['arrets']
        tax = []

        # apprentissage du lexique des arrets
        for l in stop:
            l_split = l.split('|')
            tax.append(l_split[0])
            if len(l_split[0]) == len(l):
                tax.append(l_split[0])
            else:
                tax.append(l_split[0])
                tax.extend(l_split[1].split(','))
        for a in tax:
            a = suppr_ret_char(a)
            taxonomy.append(a, type='ARRET')

        # recherche des mots cles dans le tweet (self.s)
        s = self.s
        t = parsetree(s)
        s = search('ARRET', t)
        stop_found = []
        for m in s:
            for w in m.words:
                if w.tag in accepted_tags and len(
                        w.string) > 2 and not w.string.upper() in irrelevent:
                    stop_found.append(w.string)
                elif self._is_in_lev(w.string.upper(), self.Lex_arret):
                    stop_found.append(w.string)

        # recherche des arrets composes
        # pas encore fonctionel
        to_remove = []
        compound_found = []
        for i in range(0, len(stop_found)):
            for j in range(i, len(stop_found)):
                if self._is_neighbour(stop_found[i], stop_found[j]):
                    w_compound = stop_found[i] + " " + stop_found[j]
                    compound_found.append(w_compound)
                    to_remove.append(stop_found[i])
                    to_remove.append(stop_found[j])

        stop_found.extend(compound_found)
        to_remove = list(set(to_remove))

        for w in to_remove:
            stop_found.remove(w)

        # traduction des arrets trouves en arrets reels
        for i in range(0, len(stop_found)):
            stop_found[i] = self._to_tbc_arret(stop_found[i])[0]

        # suppression des arrets non coherents
        try:
            stop_found.remove('AAAA')
            return list(set(stop_found))
        except:
            return list(set(stop_found))
Exemplo n.º 7
0
import string
from collections import defaultdict
import nltk
import pandas as pd

from nltk.corpus import framenet as fn
from textblob import TextBlob

from pattern.en import suggest, parse, parsetree, sentiment
from pattern.en import conjugate, lemma, lexeme
from pattern.search import search, taxonomy



for f in ('rose', 'lily', 'daisy', 'daffodil', 'begonia'):
    taxonomy.append(f, type='flower')

for f in ('flower', 'tree'):
    taxonomy.append(f, type='plant')
    

t = parsetree('A field of daffodils is white.', lemmata=True)
print search('PLANT', t) 

taxonomy.parents('daffodil', recursive=True)
taxonomy.children('plant', recursive=False)


#def taxonomy_normalize(sentence):    
#    bp_match = search('BEAUTY_PARTS', parsetree(sentence, lemmata=True))
#    facial_match = search('MAKEUP', parsetree(sentence, lemmata=True))
Exemplo n.º 8
0
# Import Pattern Search and English modules
from pattern.search import search, taxonomy
from pattern.en import parsetree

# Defining simple causal patterns

# Taxonomy for cause-effect pattern
causal_verb_list1 = [
    'causes', 'caused', 'would cause', 'will cause', 'is causing',
    'has been causing', 'was causing', 'had been causing', 'will be causing',
    'will have been causing', 'would be causing', 'would have been causing',
    'are causing', 'have been causing', 'were causing'
]

for c1 in causal_verb_list1:
    taxonomy.append(c1, type='CAUSALV1')

cause_effect_pattern = "{NP} CAUSALV1 {NP}"

# Taxonomy for effect-cause pattern
causal_verb_list2 = [
    "is caused", "was caused", "are caused", "were caused", "has been caused",
    "have been caused", "had been caused", "will have been caused",
    "would have been caused", "is being caused", "was being caused",
    "were being caused", "would be caused", "will be caused"
]

for c2 in causal_verb_list2:
    taxonomy.append(c2, type='CAUSALV2')

effect_cause_pattern = "{NP} CAUSALV2 by {NP}"
Exemplo n.º 9
0
lemma('humidity')

frames = fn.frames_by_lemma(r'skin')
for f in frames:
    print '%s - %s\n' % (f.name, f.definition)

fn.lexical_units(r'')
    
fn.frames_by_lemma(r'(?i)a little')    
    




for f in ('reflect', 'bank'):
    taxonomy.append(f, type='angle')

for f in ('bank', 'financial-institution'):
    taxonomy.append(f, type='finance')
    

t = parsetree('A field of daffodils is white.', lemmata=True)
print search('PLANT', t) 

taxonomy.parents('daffodil', recursive=True)
taxonomy.children('plant', recursive=False)

taxonomy.classify('bank')