Exemplo n.º 1
0
class TestPatternParser(unittest.TestCase):
    def setUp(self):
        self.parser = PatternParser()
        self.text = "And now for something completely different."

    def test_parse(self):
        assert_equal(self.parser.parse(self.text), pattern_parse(self.text))
Exemplo n.º 2
0
class TestPatternParser(unittest.TestCase):

    def setUp(self):
        self.parser = PatternParser()
        self.text = "And now for something completely different."

    def test_parse(self):
        assert_equal(self.parser.parse(self.text), pattern_parse(self.text))
Exemplo n.º 3
0
**Regular Expression Parsing**
"""

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
data = "Xi Jinping is a Chinese politician who has served as General Secretary of the Chinese Communist Party (CCP) and Chairman of the Central Military Commission (CMC) since 2012, and President of the People's Republic of China (PRC) since 2013. He has been the paramount leader of China, the most prominent political leader in the country, since 2012. The son of Chinese Communist veteran Xi Zhongxun, he was exiled to rural Yanchuan County as a teenager following his father's purge during the Cultural Revolution and lived in a cave in the village of Liangjiahe, where he joined the CCP and worked as the party secretary."
new_token = nltk.pos_tag (word_tokenize(data))
new_token

np = r "NP: {<DT>?<JJ>*<NN>}" #This is a definition for a rule to group of words into a noun phrase.  It will group one determinant, then zero or more adjectives followed by zero or more nouns. 
chunk_parser = nltk.RegexpParser(np) #RegexpParser - Uses a set of regular expression patterns to specify the behavior of the parser. 
result = chunk_parser.parse(new_token)
result

"""**Pattern Parsing**"""

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from textblob import TextBlob
from textblob.parsers import PatternParser
data = "Xi Jinping is a Chinese politician who has served as General Secretary of the Chinese Communist Party (CCP)."
new_token = nltk.pos_tag (word_tokenize(data))
chunk_parser = PatternParser()
result = chunk_parser.parse(new_token)
result