示例#1
0
def natural_language_parser(text):
    text = open(f"{text}.txt", encoding='utf-8').read().lower()
    # Split the text into individual sentences and then individual words:
    word_tokenized_text = word_sentence_tokenize(text)
    # Print any tokenized word sentence:
    print(word_tokenized_text[100])
    # Create a list to hold part-of-speech tagged sentences:
    pos_tagged_text = [pos_tag(word) for word in word_tokenized_text]
    # Print any part-of-speech tagged sentence:
    print(pos_tagged_text[100])
    # Define noun phrase chunk grammar:
    np_chunk_grammar = "NP: {<DT>?<JJ>*<NN>}"
    # Create noun phrase RegexpParser object:
    np_chunk_parser = RegexpParser(np_chunk_grammar)
    # Define verb phrase chunk grammar::
    vp_chunk_grammar = "VP: {<DT>?<JJ>*<NN><VB.*><RB.?>?}"
    # Create verb phrase RegexpParser object:
    vp_chunk_parser = RegexpParser(vp_chunk_grammar)
    # List of noun phrase chunked sentences:
    np_chunked_text = []
    # List of verb phrase chunked sentences:
    vp_chunked_text = []
    for sentence in pos_tagged_text:
        np_chunked_text.append(np_chunk_parser.parse(sentence))
        # List of verb phrase chunked sentences:
        vp_chunked_text.append(vp_chunk_parser.parse(sentence))
    # Most commons chunks:
    most_common_np_chunks = np_chunk_counter(np_chunked_text)
    most_common_vp_chunks = vp_chunk_counter(vp_chunked_text)
    print(most_common_np_chunks)
    print(most_common_vp_chunks)
示例#2
0
def process_vader():
    vader_tokenized = word_sentence_tokenize(vader)

    single_sentence_tokenized = vader_tokenized[27]
    print()
    print(f"Vader's single tokenized sentence: {single_sentence_tokenized}")

    pos_tagged_vader = list()

    for sentence in vader_tokenized:
        pos_tagged_vader.append(pos_tag(sentence))

    pos_tagged_sentence = pos_tagged_vader[27]
    print()
    print(
        f"Vader's single part-of-speech tagged sentence: {pos_tagged_sentence}"
    )

    np_chunk_grammar = 'NP: {<DT>?<JJ.?>*<NN>}'
    np_chunk_parser = RegexpParser(np_chunk_grammar)

    vp_chunk_grammar = 'VP: {<DT>?<JJ.?>*<NN><VB.?>((<RB.?>)|(<DT>?<JJ.?>*<NN>)|(<IN><DT>?<JJ.?>*<NN>))*}'
    vp_chunk_parser = RegexpParser(vp_chunk_grammar)

    np_chunked_vader = list()
    vp_chunked_vader = list()

    for sentence in pos_tagged_vader:
        np_chunked_vader.append(np_chunk_parser.parse(sentence))
        vp_chunked_vader.append(vp_chunk_parser.parse(sentence))

    top_np_chunks = np_chunk_counter(np_chunked_vader)
    top_vp_chunks = vp_chunk_counter(vp_chunked_vader)

    print()
    print("Vader's most-commonly used noun-phrases:")
    print(*top_np_chunks, sep='\n')
    print()
    print(f"Vader's most-commonly used verb-phrases:")
    print(*top_vp_chunks, sep='\n')
示例#3
0
vp_chunk_grammar = "VP: {<DT>?<JJ>*<NN><VB.*><RB.?>?}"
# create verb phrase RegexpParser object here
vp_chunk_parser = RegexpParser(vp_chunk_grammar)

# create a list to hold noun phrase chunked sentences and a list to hold verb phrase chunked sentences here
np_chunked_text = list()
vp_chunked_text = list()

# create a for loop through each pos-tagged sentence here
for pos_tagged_sentence in pos_tagged_text:
    # chunk each sentence and append to lists here
    np_chunked_text.append(np_chunk_parser.parse(pos_tagged_sentence))
    vp_chunked_text.append(vp_chunk_parser.parse(pos_tagged_sentence))

# store and print the most common NP-chunks here
most_common_np_chunks = np_chunk_counter(np_chunked_text)
print(most_common_np_chunks)

# store and print the most common VP-chunks here
most_common_vp_chunks = vp_chunk_counter(vp_chunked_text)
print(most_common_vp_chunks)
'''
Analysis for The Picture of Dorian Gray(Noun phrases)

Looking at most_common_np_chunks, you can identify characters of importance in the text such as henry, harry, dorian gray, and basil, based on their frequency. Additionally another noun phrase the picture appears to be very relevant.
'''
'''
Analysis for The Picture of Dorian Gray(Verb phrases)

Looking at most_common_vp_chunks, some interesting findings appear. The verb phrases i want, i know and i have occur frequently, indicating a theme of desire and need.
'''
# define noun phrase chunk grammar here
np_chunk_grammer = "NP: {<DT>?<JJ>*<NN>}"

# create noun phrase RegexpParser object here
np_chunk_parser = RegexpParser(np_chunk_grammer)

# define verb phrase chunk grammar here
vp_chunk_grammer = "VP: {<DT>?<JJ>*<NN><VB.><RB.?>?}"

# create verb phrase RegexpParser object here
vp_chunk_parser = RegexpParser(vp_chunk_grammer)

# create a list to hold noun phrase chunked sentences and a list to hold verb phrase chunked sentences here
np_chunked_list = []
vp_chunked_list = []

# create a for loop through each pos-tagged sentence here
for word in pos_tagged_text:
  np_chunked_list.append(np_chunk_parser.parse(word))
  vp_chunked_list.append(vp_chunk_parser.parse(word))

# store and print the most common NP-chunks here
most_common_np_chunks = np_chunk_counter(np_chunked_list)  
print(most_common_np_chunks)

# store and print the most common VP-chunks here
most_common_vp_chunks = vp_chunk_counter(vp_chunked_list)  
print(most_common_vp_chunks)