Exemplo n.º 1
0
 def test_frequency_query_of_fake_word(self):
     """
     Make sure a non-english word typed in still returns something and doesn't throw an error
     """
     fake_word = Word("asdfjk", "")
     querier = DataMuseQuerier()
     self.assertTrue(len(str(querier.get_frequency(fake_word))) > 0)
Exemplo n.º 2
0
 def test_freqency_query_of_empty_word(self):
     """
     Make sure code still runs if an empty string is passed into frequency
     """
     word = Word("", "")
     querier = DataMuseQuerier()
     self.assertTrue(len(str(querier.get_frequency(word))) > 0)
Exemplo n.º 3
0
 def test_right_context_only_query(self):
     """
     Make sure that if only right context word is given, query returns results
     """
     querier = DataMuseQuerier()
     results = querier.get_synonym_query_results(Word("the", ""),
                                                 right_context="mall")
     self.assertTrue(len(results) > 0)
Exemplo n.º 4
0
 def test_query_of_upper_case(self):
     """
     Tests to make sure that a query of an upper case word returns the expected result
     """
     upper_dog = Word("DOG", "n")
     querier = DataMuseQuerier()
     query_result = querier.get_synonym_query_results(upper_dog)[0]
     expected_result = {'score': 45976, 'tags': ['syn', 'n', 'f:0.000000'], \
                        'word': 'canis familiaris'}
     self.assertEqual(query_result, expected_result)
Exemplo n.º 5
0
 def test_api_connection(self):
     """
     Tests to make sure that a query can be executed and return the expected results
     """
     dog = Word("dog", "n")
     querier = DataMuseQuerier()
     query_result = querier.get_synonym_query_results(dog)[0]
     expected_result = {'score': 45976, 'tags': ['syn', 'n', 'f:0.000000'], \
                        'word': 'canis familiaris'}
     self.assertEqual(query_result, expected_result)
Exemplo n.º 6
0
 def __init__(self):
     self.__synonym_querier = DataMuseQuerier() 
Exemplo n.º 7
0
class WordSubstitutor:
    
    # constructor
    def __init__(self):
        self.__synonym_querier = DataMuseQuerier() 
    
    # substitute words
    # ADD IN AN ARGUMENT FOR CHECKING STOP WORDS LATER
    def get_best_synonym(self, word, left_context = "", right_context = ""):
        """ Takes in a Word obejct, looks at the top 5 synonyms according to DataMuse API,
            then returns the word witht the most complex synonym. 
            If the most complex word is the original,
            that word returned
        
        Args:
            word: Word object whose synonyms the user wants to find
            left_context: optionally specified string as left context
            right_context: optionally specified string as right context
        
        Returns:
            Word object of most complex synonym
        """
        # Only look to substitute adj, adv, verbs, and non-pronoun nouns
        if word.get_part_of_speech() not in ['adj', 'adv', 'v', 'n']:
            return word
        
        else:
            # Query results from API = synonym list, with part of speech tags
            synonym_list = self.__synonym_querier.get_synonym_query_results(word, left_context, right_context)
            
            # find and set our word's frequency
            word.set_frequency(self.__synonym_querier.get_frequency(word))
            
            best_synonym = Word("", "")
            max_synonym_score = 0
            for syn in synonym_list:
                
                # If synonym is same part of spech, compute complexity
                if word.get_part_of_speech() in syn['tags']:
                    
                    # only consider word if "score" > 20000
                    score = syn['score']
                    if score < 20000:
                        pass
                    # only consider word if it is one word, not a multi-word phrase
                    elif len(syn['word'].split(' ')) > 1:
                        pass
                    else:
                        freq = float(syn['tags'][len(syn['tags']) - 1].split(':')[1])
                        synonym = Word(syn['word'], word.get_part_of_speech(), freq)
                        syn_score = synonym.compute_complexity_score()
                        
                        if syn_score > max_synonym_score:
                            best_synonym = synonym
                            max_synonym_score = syn_score
        
        
            # Now check if best synonym is more complex than original word
            if max_synonym_score > word.compute_complexity_score():
                word = best_synonym
            
            return word
            
# Example code:
# test_word = Word("dog", "n")
# word_sub = WordSubstitutor()
# test_word = word_sub.get_best_synonym(test_word)
# print(test_word.get_word()) --> prints 'canis familiaris'
            
Exemplo n.º 8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Tester code to generate a set of replacements in a sentence
"""
from DataMuseQuerier import DataMuseQuerier
from Word import Word
from WordSubstitutor import WordSubstitutor
from SpaCyParser import SpaCyParser
import re

querier = DataMuseQuerier()
parser = SpaCyParser()
wsub = WordSubstitutor()

#test_sentences = "Wow you threw that ball very far. \
#                  I ate a delicious and big ham sandwich with cheese for lunch \
#                  The pretty girl was very worried about the difficult final exams coming up.\
#                  She would like more than anything to take a nap."

output_vec = []
parsed_words_and_tags = parser.parse_and_tag_text(test_sentences)
for token in parsed_words_and_tags:
    word = Word(token[0], token[1])
    #output_vec.append(wsub.get_best_synonym(word, left_contexts[i], right_contexts[i]).get_word())
    output_vec.append(wsub.get_best_synonym(word).get_word())

print(output_vec)

# Now concatinate:
output_text = output_vec[0]
Exemplo n.º 9
0
 def test_empty_query(self):
     """
     Make sure that the command still runs if an empty query is passed, but that nothing is returned
     """
     querier = DataMuseQuerier()
     self.assertEqual(querier.get_synonym_query_results(Word("", "")), [])
Exemplo n.º 10
0
 def test_frequency_query_of_real_word(self):
     """
     Make sure our frequency querier returns the expected result for a real word
     """
     querier = DataMuseQuerier()
     self.assertEqual(querier.get_frequency(Word("dog", "n")), 63.72759)