Python pprint示例，pattern.en.pprint Python示例

示例#1

0

显示文件

def test_findVerb():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
   
    sent = "Bachelor's in Computer Science, Information Systems or a related study, is required."
    sent = 'I ate pizza.'
    sent = "Bachelor's in Computer Science is required."
    sent = "Bachelor 's Degree or 4 years equivalent professional experience ."
    sent = "A Master ’ s Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "A Master's Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "Bachelor ’ s degree in Computer Science or equivalent"
    sent = "Bachelor ' s degree in Computer Science or equivalent"
       
    
    result = parse(sent,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
            )
    pprint(result) 
    
  #  print type(result)
  #  print result         
    sen = Sentence(result)
  #  for word in sen:
 #       print word, word.type
    
    vlist = [ word.string for word in sen if word.type.startswith("V") ]
    print vlist

示例#2

0

显示文件

def test_sentence():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
    sent1 = "BS degree ( BSEE or BSCS strongly preferred , MSCS a plus ) and/or the equivalent in training and experience ."
    sent2 = "Bachelor's degree in Computer Science is required."  
    sent3 = "He created the robot and broke it after making it."
    sent4 = "A Computer Science or related degree "    
    sent5 = "bachelors degree in Computer Science or Information Systems and/or related experience required"    
    
    result = parse(sent5,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
           chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
        relations = True,  # Find relations between chunks.
          lemmata = True,  # Find word lemmata.
            light = True)
    pprint(result)
   
    sen = Sentence(result)
  #  print type(sen)
    print sen     

    for chunk in sen.chunks:
       print chunk.type, [(w.string, w.type) for w in chunk.words]

示例#3

0

显示文件

文件： test_parsing.py 项目： byteface/sing

def run(o):

#	https://github.com/clips/pattern/blob/master/examples/03-en/03-parse.py

	import os, sys;# sys.path.insert(0, os.path.join("..", ".."))

	from pattern.en import parse, pprint, tag

	# The en module contains a fast regular expressions-based parser.
	# A parser identifies words in a sentence, word part-of-speech tags (e.g. noun, verb)
	# and groups of words that belong together (e.g. noun phrases).
	# Common part-of-speech tags: NN (noun), VB (verb), JJ (adjective), PP (preposition).
	# A tag can have a suffix, for example NNS (plural noun) or VBG (gerund verb).
	# Overview of tags: http://www.clips.ua.ac.be/pages/mbsp-tags
	s = "I eat pizza with a fork. one more test 1 Africa James Bob England Surrey Essex"
	s = parse(s,
	     tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
	         tags = True,  # Find part-of-speech tags.
	       chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
	    relations = True,  # Find relations between chunks.
	      lemmata = True,  # Find word lemmata.
	        light = False)

	# The light parameter determines how unknown words are handled.
	# By default, unknown words are tagged NN and then improved with a set of rules.
	# light=False uses Brill's lexical and contextual rules,
	# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

	# The output is a string with each sentence on a new line.
	# Words in a sentence have been annotated with tags,
	# for example: fork/NN/I-NP/I-PNP
	# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
	print s
	print

	# Prettier output can be obtained with the pprint() command:
	pprint(s)
	print

	# The string's split() method will (unless a split character is given),
	# split into a list of sentences, where each sentence is a list of words
	# and each word is a list with the word + its tags.
	print s.split()
	print 

	# The tag() command returns a list of (word, POS-tag)-tuples.
	# With light=True, this is the fastest and simplest way to get an idea 
	# of a sentence's constituents:
	s = "I eat pizza with a fork. one more test 1 Africa James Bob England Surrey Essex"
	s = tag(s)
	print s
	for word, tag in s:
	    if tag == "NN": # Find all nouns in the input string.
	        print word

示例#4

0

显示文件

def grammatical_tagging():
    sentence = "The white house is at the top of the hill"
    sentences = "The white house is at the top of the hill. My house is not"

    print(
        tag(sentence)
    )  # The result is an array of tuples tagging each word (verbs, nouns, etc.)
    print(parse(sentence))
    #pprint(parse(sentence))

    pprint(parsetree(sentences))

示例#5

0

显示文件

文件： question_tester.py 项目： mrittha/erotao

def gather_question_bits(sentence):
    question_bits=[]
    a_parse=parse(sentence,relations=True)
    print a_parse
    pprint(a_parse)
    all_bits=a_parse.split(' ')
    ids=gather_bits_by_id(all_bits)
    for id in ids:
        roles=gather_bits_by_role(ids[id])
        if 'SBJ' in roles and 'VP' in roles and 'OBJ' in roles:
            question_bits.append(roles)
    return question_bits

示例#6

0

显示文件

文件： nate_.py 项目： JaDogg/__py_playground

 def process(self):
     text = self._regex.replace(self._text)
     pt = english.parsetree(text, lemmata=True)
     processed = []
     vm = NateVm()
     english.pprint(pt)
     for sentence in pt:
         words = sentence
         pos = 0
         last = len(words)
         while pos < last:
             for pattern, code in self._logic:
                 matched = pattern.match(words, start=pos)
                 if matched:
                     vm.run(matched, code)
                     pos = matched.stop
                     processed += vm.get()
                     break
             else:
                 processed.append(words[pos])
                 pos += 1
     self.rebuild_text(processed)

示例#7

0

显示文件

def test_parse():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
   
    sent = "Experience with mobile application development a plus: iPhone/iPad, Android, or Blackberry."
    sent = "3+ years web software development experience."
    sent = "Bachelor's in Computer Science, Information Systems or a related study, is required."
    sent = 'I ate pizza.'
    sent = "Bachelor's in Computer Science is required."
    sent = "Bachelor 's Degree or 4 years equivalent professional experience ."
    sent = "A Master ’ s Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "A Master's Degree or equivalent in Electrical Engineering , Computer Science , or other technical/engineering field with related programming experience and applicable work experience is required ."
    sent = "BS degree ( BSEE or BSCS strongly preferred , MSCS a plus ) and/or the equivalent in training and experience ."      
    
    result = parse(sent,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
           chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
        relations = True,  # Find relations between chunks.
          lemmata = True,  # Find word lemmata.
            light = True)
    pprint(result)

示例#8

0

显示文件

文件： STM_show_tags.py 项目： byteface/sing

def run(o):
	""" STM is shortcuts to the short_term_memory operators """

	STM_PATH = './bin/%s/brain/short_term_memory' % o.o['name']
	
	WM_PATH = './bin/%s/brain/working_memory/' % o.o['name']

	import os, sys;
	mydirs = os.listdir( STM_PATH )

	from pattern.en import parse, pprint, tag

	import shutil

	for word in mydirs:
		
		ignore = [".DS_Store",".gitignore","README.txt"]
		if word in ignore:
			continue

		#print word
		s = parse(word,tags=True)
		#print s
		pprint(s)

		tagged = s.split('/')[1]

		#print tagged
		from_path = "%s/%s" % (STM_PATH,word)

		# TODO - ask do you want to move numbers
		#if tagged != "NNP" :
		#	pprint(s)

		#	to_path = "%s/%s" % (WM_PATH,"NUMBERS")
		#	os.system( "rsync -avrz %s %s" % (from_path,to_path) )
		#	shutil.rmtree(from_path)

示例#9

0

显示文件

文件： day7_pattern.py 项目： enliktjioe/28daysofnlp

# - Even it's not as popular as spaCy or NLTK, it has unique functionalities such as finding superlatives and comparatives, get fact and opinion detecetion which other NLP libraries doesn't have [1]

## installation
# !pip install pattern

# # Python for NLP: Introduction to the Pattern Library [1]

# ## Pattern Library Functions for NLP

# ### Tokenizing, POS Tagging, and Chunking

from pattern.en import parse
from pattern.en import pprint

pprint(
    parse('I drove my car to the hospital yesterday',
          relations=True,
          lemmata=True))

print(
    parse('I drove my car to the hospital yesterday',
          relations=True,
          lemmata=True).split())

# ### Pluralizing and Singularizing the Tokens

from pattern.en import pluralize, singularize

print(pluralize('leaf'))
print(singularize('theives'))

# ### Converting Adjective to Comparative and Superlative Degrees

示例#10

0

显示文件

文件： 03-parse.py 项目： clips/pattern

        light = False)

# The light parameter determines how unknown words are handled.
# By default, unknown words are tagged NN and then improved with a set of rules.
# light=False uses Brill's lexical and contextual rules,
# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

# The output is a string with each sentence on a new line.
# Words in a sentence have been annotated with tags,
# for example: fork/NN/I-NP/I-PNP
# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
print(s)
print("")

# Prettier output can be obtained with the pprint() command:
pprint(s)
print("")

# The string's split() method will (unless a split character is given),
# split into a list of sentences, where each sentence is a list of words
# and each word is a list with the word + its tags.
print(s.split())
print("")

# The tag() command returns a list of (word, POS-tag)-tuples.
# With light=True, this is the fastest and simplest way to get an idea
# of a sentence's constituents:
s = "I eat pizza with a fork."
s = tag(s)
print(s)
for word, tag in s:

示例#11

0

显示文件

文件： try_pattern_2.py 项目： folagit/resumatcher

def test_pprint():
    from pattern.en import parse
    from pattern.en import pprint

    result = parse('I ate pizza.', relations=True, lemmata=True)
    pprint(result)

示例#12

0

显示文件

文件： pattern_try1.py 项目： aditipatil0711/IndianSignLanguage_Final-Project

# -*- coding: utf-8 -*-
"""
Created on Wed Dec 16 11:24:05 2020

@author: praja
"""
#
from pattern.en import parse
from pattern.en import pprint
##
pprint(parse('He went to park', relations=True, lemmata=True))
print("sucesfull!!!")

示例#13

0

显示文件

    x.replace("\n", " ")
    for x in nltk.sent_tokenize(plotText.replace("\t", ""))
]

for strSentence in sentList:

    for word, pos in tag(strSentence):
        if pos in ("VB", "VBD", "VBG", "VBN", "VBP",
                   "VBZ"):  # Retrieve all adjectives.
            print("=====================>>>>> ", word, pos)
        else:
            print(word, pos)

    print(strSentence)
    a = parse(strSentence, relations=True, lemmata=True)
    pprint(a)

    sentence = Sentence(a)
    print(sentence.verbs)
    print
    print

    #print(sentence.relations)
    #print(sentence.subjects)
    #print(sentence.objects)
    #print(sentence.verbs)
    #print(sentence.chunk)

    sentScore = sid.polarity_scores(strSentence)

    # sqlite3 insert : subject / objects / verbs / CPC / Sentiment

示例#14

0

显示文件

文件： try_pattern_2.py 项目： folagit/resumatcher

def test_pprint():
    from pattern.en import parse
    from pattern.en import pprint 
    
    result = parse('I ate pizza.', relations=True, lemmata=True)
    pprint(result)

示例#15

0

显示文件

文件： tm_pattern.py 项目： Godcomplex11/DU

#https://stackabuse.com/python-for-nlp-introduction-to-the-pattern-library/
#standard libaries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pydataset import data
import seaborn as sns

#pip install pattern
from pattern.en import parse
from pattern.en import pprint
parse('Hello Everyone and Welcome to Analytics India Magazine')
#parse function differentiate the words in the sentence as a noun, verb, subject, or subject. We can also use the ‘pprint’ function defined in the pattern library to display the parsed sentence in a clear manner.
pprint(
    parse('Hello Everyone and Welcome to Analytics India Magazine',
          relations=True,
          tokenize=True,
          lemmata=True))

#%% ngrams
# "n" combination of words in a sentence.
from pattern.en import ngrams
print(ngrams("Hello Everyone and Welcome to Analytics India Magazine", n=3))
print(ngrams("He goes to hospital", n=2))

#sentiment
#Sentiment refers to an opinion or feeling towards a certain thing. sentiment object is used to find the polarity (positivity or negativity) of a text along with its subjectivity.

from pattern.en import sentiment
print(sentiment("He is a good boy but sometimes he behaves miserably"))

示例#16

0

显示文件

文件： pattern_en.py 项目： vishalbelsare/pattern_CLiPS

    tokenize=True,  # Split punctuation marks from words?
    tags=True,  # Parse part-of-speech tags? (NN, JJ, ...)
    chunks=True,  # Parse chunks? (NP, VP, PNP, ...)
    relations=False,  # Parse chunk relations? (-SBJ, -OBJ, ...)
    lemmata=False,  # Parse lemmata? (ate => eat)
    encoding='utf-8',  # Input string encoding.
    tagset=None)  # Penn Treebank II (default) or UNIVERSAL.
# parser tagger and tokenizer
for word, pos in tag('I feel *happy*!', tokenize=True, encoding='utf-8'):
    if pos == "JJ":  # Retrieve all adjectives.
        print word
print tokenize('I feel *happy*!',
               punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_",
               replace={})
# parser output
pprint(parse('I ate pizza.', relations=True, lemmata=True))
# parse trees
s = parsetree(
    'The cat sat on the mat.',
    tokenize=True,  # Split punctuation marks from words?
    tags=True,  # Parse part-of-speech tags? (NN, JJ, ...)
    chunks=True,  # Parse chunks? (NP, VP, PNP, ...)
    relations=False,  # Parse chunk relations? (-SBJ, -OBJ, ...)
    lemmata=False,  # Parse lemmata? (ate => eat)
    encoding='utf-8',  # Input string encoding.
    tagset=None)  # Penn Treebank II (default) or UNIVERSAL.
print repr(s)
for sentence in s:
    for chunk in sentence.chunks:
        print chunk.type, [(w.string, w.type) for w in chunk.words]
for sentence in tree(open('data/input/tagged.txt'),

示例#17

0

显示文件

文件： 03-parse.py 项目： mlyne/Scripts

    light=False)

# The light parameter determines how unknown words are handled.
# By default, unknown words are tagged NN and then improved with a set of rules.
# light=False uses Brill's lexical and contextual rules,
# light=True uses a set of custom rules that is less accurate but faster (5x-10x).

# The output is a string with each sentence on a new line.
# Words in a sentence have been annotated with tags,
# for example: fork/NN/I-NP/I-PNP
# NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
print s
print

# Prettier output can be obtained with the pprint() command:
pprint(s)
print

# The string's split() method will (unless a split character is given),
# split into a list of sentences, where each sentence is a list of words
# and each word is a list with the word + its tags.
print s.split()
print

# The tag() command returns a list of (word, POS-tag)-tuples.
# With light=True, this is the fastest and simplest way to get an idea
# of a sentence's constituents:
s = "I eat pizza with a fork."
s = tag(s, light=True)
print s
for word, tag in s:

示例#18

0

显示文件

# mmain ref
http://www.academypublisher.com/jetwi/vol01/no1/jetwi01016076.pdf
#to draw a paser tree in regersive 
from textblob import TextBlob
wiki = TextBlob(open('full.txt','rU').read())
a=wiki.tags
import nltk 
sentence = a

pattern = """NP: {<DT>?<JJ>*<NN>}
VBD: {<VBD>}
IN: {<IN>}"""
NPChunker = nltk.RegexpParser(pattern) 
result = NPChunker.parse(sentence)
result.draw()

# regresive array input for pos taging 
from pattern.en import parse
from pattern.en import pprint 

with open('spam.txt', 'rU') as ins:
    array = []
    for line in ins:
        array.append(line)
for i in array:
	pprint(parse(i, relations=True, lemmata=True))

#new reference 
https://www.academia.edu/11692120/Human_Intentions_Mining_Through_Natural_Language_Text_Survey
# in lexical word list approch now the acurecy may less becuse of the equal wiehgt for all the data is a disadvaange. so i need to give appropriste value for the data

示例#19

0

显示文件

文件： nonet_generator.py 项目： Mikhaela/TextMining

	Make a Nonet
	(first iteration)
	1st line: contain 9 syllables
	2nd line: contain 8 syllables 
	3rd line: contain 7 syllables 
	...
	9th line: contain 1 syllable

"""

from pattern.en import parsetree
from pattern.en import tag
from pattern.en import pprint

def word_eval(string)
	pprint(parsetree(string, relations = True))
	for word, pos in tag(string):
		if pos == "NN":
			print word

def gutenberg_text_gather(current_URL):

	from pattern.web import *
	buddhist_psalm_text = URL(current_URL).download()
	print buddhist_psalm_text

	# Save data to a file (will be part of your data fetching script)
	f = open('buddhist_psalm_text.pickle','w')
	pickle.dump(all_texts,f)
	f.close()