示例#1
0
def test_findVerb():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
   
    sent = "Bachelor's in Computer Science, Information Systems or a related study, is required."
    sent = 'I ate pizza.'
    sent = "Bachelor's in Computer Science is required."
    sent = "Bachelor 's Degree or 4 years equivalent professional experience ."
    sent = "A Master ’ s Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "A Master's Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "Bachelor ’ s degree in Computer Science or equivalent"
    sent = "Bachelor ' s degree in Computer Science or equivalent"
       
    
    result = parse(sent,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
            )
    pprint(result) 
    
  #  print type(result)
  #  print result         
    sen = Sentence(result)
  #  for word in sen:
 #       print word, word.type
    
    vlist = [ word.string for word in sen if word.type.startswith("V") ]
    print vlist
示例#2
0
def test_sentence():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
    sent1 = "BS degree ( BSEE or BSCS strongly preferred , MSCS a plus ) and/or the equivalent in training and experience ."
    sent2 = "Bachelor's degree in Computer Science is required."  
    sent3 = "He created the robot and broke it after making it."
    sent4 = "A Computer Science or related degree "    
    sent5 = "bachelors degree in Computer Science or Information Systems and/or related experience required"    
    
    result = parse(sent5,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
           chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
        relations = True,  # Find relations between chunks.
          lemmata = True,  # Find word lemmata.
            light = True)
    pprint(result)
   
    sen = Sentence(result)
  #  print type(sen)
    print sen     

    for chunk in sen.chunks:
       print chunk.type, [(w.string, w.type) for w in chunk.words]
示例#3
0
def run(o):

#	https://github.com/clips/pattern/blob/master/examples/03-en/03-parse.py

	import os, sys;# sys.path.insert(0, os.path.join("..", ".."))

	from pattern.en import parse, pprint, tag

	# The en module contains a fast regular expressions-based parser.
	# A parser identifies words in a sentence, word part-of-speech tags (e.g. noun, verb)
	# and groups of words that belong together (e.g. noun phrases).
	# Common part-of-speech tags: NN (noun), VB (verb), JJ (adjective), PP (preposition).
	# A tag can have a suffix, for example NNS (plural noun) or VBG (gerund verb).
	# Overview of tags: http://www.clips.ua.ac.be/pages/mbsp-tags
	s = "I eat pizza with a fork. one more test 1 Africa James Bob England Surrey Essex"
	s = parse(s,
	     tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
	         tags = True,  # Find part-of-speech tags.
	       chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
	    relations = True,  # Find relations between chunks.
	      lemmata = True,  # Find word lemmata.
	        light = False)

	# The light parameter determines how unknown words are handled.
	# By default, unknown words are tagged NN and then improved with a set of rules.
	# light=False uses Brill's lexical and contextual rules,
	# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

	# The output is a string with each sentence on a new line.
	# Words in a sentence have been annotated with tags,
	# for example: fork/NN/I-NP/I-PNP
	# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
	print s
	print

	# Prettier output can be obtained with the pprint() command:
	pprint(s)
	print

	# The string's split() method will (unless a split character is given),
	# split into a list of sentences, where each sentence is a list of words
	# and each word is a list with the word + its tags.
	print s.split()
	print 

	# The tag() command returns a list of (word, POS-tag)-tuples.
	# With light=True, this is the fastest and simplest way to get an idea 
	# of a sentence's constituents:
	s = "I eat pizza with a fork. one more test 1 Africa James Bob England Surrey Essex"
	s = tag(s)
	print s
	for word, tag in s:
	    if tag == "NN": # Find all nouns in the input string.
	        print word
示例#4
0
def grammatical_tagging():
    sentence = "The white house is at the top of the hill"
    sentences = "The white house is at the top of the hill. My house is not"

    print(
        tag(sentence)
    )  # The result is an array of tuples tagging each word (verbs, nouns, etc.)
    print(parse(sentence))
    #pprint(parse(sentence))

    pprint(parsetree(sentences))
示例#5
0
def gather_question_bits(sentence):
    question_bits=[]
    a_parse=parse(sentence,relations=True)
    print a_parse
    pprint(a_parse)
    all_bits=a_parse.split(' ')
    ids=gather_bits_by_id(all_bits)
    for id in ids:
        roles=gather_bits_by_role(ids[id])
        if 'SBJ' in roles and 'VP' in roles and 'OBJ' in roles:
            question_bits.append(roles)
    return question_bits
示例#6
0
 def process(self):
     text = self._regex.replace(self._text)
     pt = english.parsetree(text, lemmata=True)
     processed = []
     vm = NateVm()
     english.pprint(pt)
     for sentence in pt:
         words = sentence
         pos = 0
         last = len(words)
         while pos < last:
             for pattern, code in self._logic:
                 matched = pattern.match(words, start=pos)
                 if matched:
                     vm.run(matched, code)
                     pos = matched.stop
                     processed += vm.get()
                     break
             else:
                 processed.append(words[pos])
                 pos += 1
     self.rebuild_text(processed)
示例#7
0
def test_parse():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
   
    sent = "Experience with mobile application development a plus: iPhone/iPad, Android, or Blackberry."
    sent = "3+ years web software development experience."
    sent = "Bachelor's in Computer Science, Information Systems or a related study, is required."
    sent = 'I ate pizza.'
    sent = "Bachelor's in Computer Science is required."
    sent = "Bachelor 's Degree or 4 years equivalent professional experience ."
    sent = "A Master ’ s Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "A Master's Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "BS degree ( BSEE or BSCS strongly preferred , MSCS a plus ) and/or the equivalent in training and experience ."      
    
    result = parse(sent,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
           chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
        relations = True,  # Find relations between chunks.
          lemmata = True,  # Find word lemmata.
            light = True)
    pprint(result) 
示例#8
0
def run(o):
	""" STM is shortcuts to the short_term_memory operators """

	STM_PATH = './bin/%s/brain/short_term_memory' % o.o['name']
	
	WM_PATH = './bin/%s/brain/working_memory/' % o.o['name']

	import os, sys;
	mydirs = os.listdir( STM_PATH )

	from pattern.en import parse, pprint, tag

	import shutil

	for word in mydirs:
		
		ignore = [".DS_Store",".gitignore","README.txt"]
		if word in ignore:
			continue

		#print word
		s = parse(word,tags=True)
		#print s
		pprint(s)

		tagged = s.split('/')[1]

		#print tagged
		from_path = "%s/%s" % (STM_PATH,word)

		# TODO - ask do you want to move numbers
		#if tagged != "NNP" :
		#	pprint(s)

		#	to_path = "%s/%s" % (WM_PATH,"NUMBERS")
		#	os.system( "rsync -avrz %s %s" % (from_path,to_path) )
		#	shutil.rmtree(from_path)
示例#9
0
# - Even it's not as popular as spaCy or NLTK, it has unique functionalities such as finding superlatives and comparatives, get fact and opinion detecetion which other NLP libraries doesn't have [1]

## installation
# !pip install pattern

# # Python for NLP: Introduction to the Pattern Library [1]

# ## Pattern Library Functions for NLP

# ### Tokenizing, POS Tagging, and Chunking

from pattern.en import parse
from pattern.en import pprint

pprint(
    parse('I drove my car to the hospital yesterday',
          relations=True,
          lemmata=True))

print(
    parse('I drove my car to the hospital yesterday',
          relations=True,
          lemmata=True).split())

# ### Pluralizing and Singularizing the Tokens

from pattern.en import pluralize, singularize

print(pluralize('leaf'))
print(singularize('theives'))

# ### Converting Adjective to Comparative and Superlative Degrees
示例#10
0
文件: 03-parse.py 项目: clips/pattern
        light = False)

# The light parameter determines how unknown words are handled.
# By default, unknown words are tagged NN and then improved with a set of rules.
# light=False uses Brill's lexical and contextual rules,
# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

# The output is a string with each sentence on a new line.
# Words in a sentence have been annotated with tags,
# for example: fork/NN/I-NP/I-PNP
# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
print(s)
print("")

# Prettier output can be obtained with the pprint() command:
pprint(s)
print("")

# The string's split() method will (unless a split character is given),
# split into a list of sentences, where each sentence is a list of words
# and each word is a list with the word + its tags.
print(s.split())
print("")

# The tag() command returns a list of (word, POS-tag)-tuples.
# With light=True, this is the fastest and simplest way to get an idea
# of a sentence's constituents:
s = "I eat pizza with a fork."
s = tag(s)
print(s)
for word, tag in s:
示例#11
0
def test_pprint():
    from pattern.en import parse
    from pattern.en import pprint

    result = parse('I ate pizza.', relations=True, lemmata=True)
    pprint(result)
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 16 11:24:05 2020

@author: praja
"""
#
from pattern.en import parse
from pattern.en import pprint
##
pprint(parse('He went to park', relations=True, lemmata=True))
print("sucesfull!!!")

示例#13
0
    x.replace("\n", " ")
    for x in nltk.sent_tokenize(plotText.replace("\t", ""))
]

for strSentence in sentList:

    for word, pos in tag(strSentence):
        if pos in ("VB", "VBD", "VBG", "VBN", "VBP",
                   "VBZ"):  # Retrieve all adjectives.
            print("=====================>>>>> ", word, pos)
        else:
            print(word, pos)

    print(strSentence)
    a = parse(strSentence, relations=True, lemmata=True)
    pprint(a)

    sentence = Sentence(a)
    print(sentence.verbs)
    print
    print

    #print(sentence.relations)
    #print(sentence.subjects)
    #print(sentence.objects)
    #print(sentence.verbs)
    #print(sentence.chunk)

    sentScore = sid.polarity_scores(strSentence)

    # sqlite3 insert : subject / objects / verbs / CPC / Sentiment
示例#14
0
def test_pprint():
    from pattern.en import parse
    from pattern.en import pprint 
    
    result = parse('I ate pizza.', relations=True, lemmata=True)
    pprint(result)    
示例#15
0
#https://stackabuse.com/python-for-nlp-introduction-to-the-pattern-library/
#standard libaries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pydataset import data
import seaborn as sns

#pip install pattern
from pattern.en import parse
from pattern.en import pprint
parse('Hello Everyone and Welcome to Analytics India Magazine')
#parse function differentiate the words in the sentence as a noun, verb, subject, or subject. We can also use the ‘pprint’ function defined in the pattern library to display the parsed sentence in a clear manner.
pprint(
    parse('Hello Everyone and Welcome to Analytics India Magazine',
          relations=True,
          tokenize=True,
          lemmata=True))

#%% ngrams
# "n" combination of words in a sentence.
from pattern.en import ngrams
print(ngrams("Hello Everyone and Welcome to Analytics India Magazine", n=3))
print(ngrams("He goes to hospital", n=2))

#sentiment
#Sentiment refers to an opinion or feeling towards a certain thing. sentiment object is used to find the polarity (positivity or negativity) of a text along with its subjectivity.

from pattern.en import sentiment
print(sentiment("He is a good boy but sometimes he behaves miserably"))
示例#16
0
    tokenize=True,  # Split punctuation marks from words?
    tags=True,  # Parse part-of-speech tags? (NN, JJ, ...)
    chunks=True,  # Parse chunks? (NP, VP, PNP, ...)
    relations=False,  # Parse chunk relations? (-SBJ, -OBJ, ...)
    lemmata=False,  # Parse lemmata? (ate => eat)
    encoding='utf-8',  # Input string encoding.
    tagset=None)  # Penn Treebank II (default) or UNIVERSAL.
# parser tagger and tokenizer
for word, pos in tag('I feel *happy*!', tokenize=True, encoding='utf-8'):
    if pos == "JJ":  # Retrieve all adjectives.
        print word
print tokenize('I feel *happy*!',
               punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_",
               replace={})
# parser output
pprint(parse('I ate pizza.', relations=True, lemmata=True))
# parse trees
s = parsetree(
    'The cat sat on the mat.',
    tokenize=True,  # Split punctuation marks from words?
    tags=True,  # Parse part-of-speech tags? (NN, JJ, ...)
    chunks=True,  # Parse chunks? (NP, VP, PNP, ...)
    relations=False,  # Parse chunk relations? (-SBJ, -OBJ, ...)
    lemmata=False,  # Parse lemmata? (ate => eat)
    encoding='utf-8',  # Input string encoding.
    tagset=None)  # Penn Treebank II (default) or UNIVERSAL.
print repr(s)
for sentence in s:
    for chunk in sentence.chunks:
        print chunk.type, [(w.string, w.type) for w in chunk.words]
for sentence in tree(open('data/input/tagged.txt'),
示例#17
0
文件: 03-parse.py 项目: mlyne/Scripts
    light=False)

# The light parameter determines how unknown words are handled.
# By default, unknown words are tagged NN and then improved with a set of rules.
# light=False uses Brill's lexical and contextual rules,
# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

# The output is a string with each sentence on a new line.
# Words in a sentence have been annotated with tags,
# for example: fork/NN/I-NP/I-PNP
# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
print s
print

# Prettier output can be obtained with the pprint() command:
pprint(s)
print

# The string's split() method will (unless a split character is given),
# split into a list of sentences, where each sentence is a list of words
# and each word is a list with the word + its tags.
print s.split()
print

# The tag() command returns a list of (word, POS-tag)-tuples.
# With light=True, this is the fastest and simplest way to get an idea
# of a sentence's constituents:
s = "I eat pizza with a fork."
s = tag(s, light=True)
print s
for word, tag in s:
示例#18
0
# mmain ref
http://www.academypublisher.com/jetwi/vol01/no1/jetwi01016076.pdf
#to draw a paser tree in regersive 
from textblob import TextBlob
wiki = TextBlob(open('full.txt','rU').read())
a=wiki.tags
import nltk 
sentence = a

pattern = """NP: {<DT>?<JJ>*<NN>}
VBD: {<VBD>}
IN: {<IN>}"""
NPChunker = nltk.RegexpParser(pattern) 
result = NPChunker.parse(sentence)
result.draw()

# regresive array input for pos taging 
from pattern.en import parse
from pattern.en import pprint 

with open('spam.txt', 'rU') as ins:
    array = []
    for line in ins:
        array.append(line)
for i in array:
	pprint(parse(i, relations=True, lemmata=True))

#new reference 
https://www.academia.edu/11692120/Human_Intentions_Mining_Through_Natural_Language_Text_Survey
# in lexical word list approch now the acurecy may less becuse of the equal wiehgt for all the data is a disadvaange. so i need to give appropriste value for the data
示例#19
0
	Make a Nonet
	(first iteration)
	1st line: contain 9 syllables
	2nd line: contain 8 syllables 
	3rd line: contain 7 syllables 
	...
	9th line: contain 1 syllable

"""

from pattern.en import parsetree
from pattern.en import tag
from pattern.en import pprint

def word_eval(string)
	pprint(parsetree(string, relations = True))
	for word, pos in tag(string):
		if pos == "NN":
			print word

def gutenberg_text_gather(current_URL):

	from pattern.web import *
	buddhist_psalm_text = URL(current_URL).download()
	print buddhist_psalm_text

	# Save data to a file (will be part of your data fetching script)
	f = open('buddhist_psalm_text.pickle','w')
	pickle.dump(all_texts,f)
	f.close()