def __init__(self): self.emotions = ["happy", "sad", "angry", "excited", "fearful"] emotion_properties = [ Word(self.emotions[0]), Word(self.emotions[1]), Word(self.emotions[2]), Word(self.emotions[3]), Word(self.emotions[4]) ] self.happy_synonyms = list( itertools.chain.from_iterable( emotion_properties[0].synonyms('all'))) self.happy_antonym = emotion_properties[0].antonyms()[0] self.sad_synonyms = list( itertools.chain.from_iterable( emotion_properties[1].synonyms('all'))) self.sad_antonym = emotion_properties[1].antonyms()[0] self.angry_synonyms = list( itertools.chain.from_iterable( emotion_properties[2].synonyms('all'))) self.angry_antonym = emotion_properties[2].antonyms()[0] self.excited_synonyms = list( itertools.chain.from_iterable( emotion_properties[3].synonyms('all'))) self.excited_antonym = emotion_properties[3].antonyms()[0] self.fearful_synonyms = list( itertools.chain.from_iterable( emotion_properties[4].synonyms('all'))) self.fearful_antonym = emotion_properties[4].antonyms()[0]
def get_word_to_synonyms_dict(self, n, text, tokenized_text): word_to_syns_dict = {} word_to_pos = self.get_important_words(tokenized_text) for w in tokenized_text: if w in word_to_pos: list_of_syns_for_w = [] original_synset = lesk(text, w) if original_synset: word = Word(w) p_o_s = pos_dict_thesaurus[word_to_pos[w]] syns = word.synonyms('all', partOfSpeech=p_o_s) flat_list = [item for sublist in syns for item in sublist] for candidate_syn in flat_list: candidate_synsets = wordnet.synsets(candidate_syn, pos=pos_dict[word_to_pos[w]]) if len(candidate_synsets) > 0: list_sims = [original_synset.wup_similarity(x) for x in candidate_synsets if original_synset.wup_similarity(x)] if len(list_sims) > 0: maxSim = max(list_sims) list_of_syns_for_w.append((candidate_syn, maxSim)) if list_of_syns_for_w: list_of_syns_for_w.sort(key=lambda x: x[1], reverse=True) n_truncate = n if n <= len(list_of_syns_for_w) else len(list_of_syns_for_w) word_to_syns_dict[(w, word_to_pos[w])] = list_of_syns_for_w[:n_truncate] return word_to_syns_dict
def run_news_through_filter(news): print('Filtering news...') filtered = [] news = [random.choice(news)] for item in news: headline = item['title'].split() altered_sentence = [] for word in headline: w = Word(word) synonym = w.synonyms(relevance=1) if len(synonym) == 0: word_to_use = word else: word_to_use = random.choice(synonym) altered_sentence.append(word_to_use) altered_headline = ' '.join(word for word in altered_sentence) tmp = {'altered_title': altered_headline, 'url': item['url']} filtered.append(tmp) return filtered[0]
def __init__(self,master,width,height,word,column,columnspan,row,partspeech): #inisiasi class self.base_word=Word(word) self.master = master self.width = width self.height = height self.column = column self.row = row self.columnspan = columnspan self.partspeech = partspeech
def cli(word): rWord = Word(word) synonyms = rWord.synonyms() if not synonyms: click.echo("No results.") return for idx, synonym in enumerate(synonyms): click.echo("{0}. {1}".format(idx + 1, synonym))
def find_synonym(word): if word in common: return common[word] w = Word(word) syns = w.synonyms() for syn in syns: if not has5(syn): return syn return word.replace('e', '-')
def _read(filename): with open('datsets/categories/%s' % (filename,), 'r') as fp: data = fp.read() words = [s.strip() for s in data.splitlines()] categories[filename] = set(words) for word in words: if word in ["what", "why", "analyse"]: continue w = Word(word) categories[filename].update(w.synonyms()) categories[filename] = list(categories[filename])
def get_synonyms(words_dict): word_syns = {} for w in tqdm.tqdm(words_dict): word = Word(w) try: syns = word.synonyms(relevance=3) except: syns = None if syns is not None: word_syns[w] = syns return word_syns
def get_synonym(word, part_of_speech=None): """ Returns a synonym for a given word. :param word: :param part_of_speech: :return: """ if part_of_speech: return Word(word).synonyms(partOfSpeech=part_of_speech)[0] else: return Word(word).synonyms()
def get_syn_classes(words): syns = [] for i in range(len(words)): try: w = Word(words[i]) syn = w.synonyms('all') for j in range(len(syn)): syns = syns + syn[j] except: pass words = words + syns return words
def get_syn(words): syns = [] for i in range(len(words)): try: w = Word(words[i]) syn = w.synonyms('all') for j in range(len(syn)): syns = syns + syn[j] except: #print(words[i],' not found') pass words = words + syns return words
def generate_synonyms_dict(words_list): synonyms_dict = dict() for chars in words_list: try: w = Word(chars) syns = w.synonyms( relevance=3, form='common', ) synonyms_dict[chars] = syns except: synonyms_dict[chars] = [ chars, ] return synonyms_dict
def get_synonyms(word): synonyms = Word(word).synonyms('all', relevance=[2, 3]) if not synonyms: synonyms = "No synonyms found" return synonyms
def vectorize_sent(documents_token, model): sents_vec = [] vocab = model.wv.vocab for sent in documents_token: l = 0 wv = 0 for token in sent: if token in vocab: wv += model.wv[token] l += 1 else: # check for synonyms if word not in vocab try: syns = Word(token).synonyms() for syn in syns: if syn in vocab: wv += model.wv[syn] l += 1 break except: continue if l != 0: sents_vec.append(wv / l) else: sents_vec.append(None) return sents_vec
def new_headline(headline): head_list = headline.split() new_head = [] for word in head_list: if word in STOP: new_head.append(word.upper()) continue w = Word(word) syn = w.synonyms() if syn: new = syn[0] new_head.append(new.upper()) else: new_head.append(word.upper()) new_headline_string = (' ').join(new_head) return new_headline_string
def main(include_file, exclude_file, relevance=[3], length=[1], parts_of_speech=[]): with open(include_file) as file: words = set(file.read().splitlines()) with open(exclude_file) as file: exclude = set(file.read().splitlines()) word_objects = map(lambda w: Word(w), list(words)) syns = map( lambda w: w.synonyms('all', relevance=relevance, length=length, partsOfSpeech=parts_of_speech), word_objects) synonyms = flatten(syns) good = set(words) new = synonyms - good - exclude with open('new-words.txt', 'w') as file: for s in new: print(s) file.write('\n'.join(list(new))) print Color.USED + ' '.join(good) + Color.END
def closest_synonyms(word, choices): c0, c1, c2, c3 = choices syn_list = wordnet.synsets(word) thes_word = Word('word').synonyms() for i in syn_list[0].lemmas(): s = " ".join(i.name().split('_')) if (Solver.similar(s, c0)): return c0 elif Solver.similar(s, c1): return c1 elif Solver.similar(s, c2): return c2 elif Solver.similar(s, c3): return c3 for i in thes_word: s = i if (Solver.similar(s, c0)): return c0 elif Solver.similar(s, c1): return c1 elif Solver.similar(s, c2): return c2 elif Solver.similar(s, c3): return c3 raise ValueError('Word not found')
def choose_syn(item): # Choose a random synonym for each word synonyms = Word(item).synonyms() to_return = "" if len(synonyms) > 0: if relevancy == 0: # Choose first synonym to_return = synonyms[0] elif relevancy == 1: # Choose the top 20% relevant synonyms top_20 = math.ceil(len(synonyms) * .2) new_list = synonyms[0:top_20] to_return = random.choice(new_list) elif relevancy == 2: # Choose the top 40% relevant synonyms top_40 = math.ceil(len(synonyms) * .4) new_list = synonyms[0:top_40] to_return = random.choice(new_list) elif relevancy == 3: # Choose a random synonym to_return = random.choice(synonyms) else: to_return = item print(item + " -> " + to_return) return to_return
def parallel(inputs): (word, word_Dictionary, count, chosen_index) = inputs new_instance = Word(word) synonyms = new_instance.synonyms('all', relevance=[3], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) if (response == False): synonyms = new_instance.synonyms('all', relevance=[2], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) if (response == False): synonyms = new_instance.synonyms('all', relevance=[1], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) return (chosen_index, response)
def find_synonym(q_arr): keyword_list = QuestionAnalysis.get_question_keywords() for word in q_arr: words = Word(word).synonyms() matches = list(filter(lambda w: w in keyword_list.keys(), words)) if matches != []: QuestionAnalysis.add_word_to_keyword_list(word, keyword_list[matches[0]], keyword_list) return keyword_list[matches[0]]
def get_synonyms(word, src="wordnet"): synonyms = set() if src == "wordnet": for ss in wn.synsets(word): synonyms |= set(ss.lemma_names()) elif src == "thesaurus": try: w = Word(word) except: return synonyms try: syn = w.synonyms(relevance=[2, 3]) except: return synonyms for s in syn: if len(s.split(' ')) == 1: synonyms.add(s.lower()) return synonyms
def anotherword(response): #Some rudimentary synonyms and antonyms. choice=input('Enter your choice: \n 1.Synonyms.\n2.Antonyms\n') if(int(choice)>2): print("Invalid Choice") exit() word=input("Enter the word:") w = Word(word) what='' if('1') in choice: temp=w.synonyms() what='Synonyms' elif('2') in choice: temp=w.antonyms() what='Antonyms' print('Showing %s of %s'%(what,word)) for t in temp: print(t)
def listConcept(word): w = Word(word) iE = inflect.engine() pS = PorterStemmer() wordSyns = w.synonyms() myList1 = [] myList1.append(word) for w in wordSyns: myList1.append(w) myList2 = myList1.copy() for w in myList2: pW = iE.plural(w) myList1.append(pW) myList3 = myList1.copy() for w in myList3: myList1.append(pS.stem(w)) myList1 = list(dict.fromkeys(myList1)) return myList1
def reccomend_word(text): words = nltk.tokenize.word_tokenize(text) benedict = nltk.FreqDist(words) target_word = benedict.max() candidate_words = Word(target_word).synonyms(relevance=1) stop_words = set(stopwords.words('english')) filtered_candidates = [w for w in candidate_words if w not in stop_words] reccomendation = filtered_candidates[random.randint( 0, len(filtered_candidates) + 1)] return reccomendation
def rhymeGen(word1, word2): rhymers = myRhym(word1) needers = Word(word2).synonyms() poss = pd.DataFrame({'High': [], 'Med': [], 'Low': []}) #print(needers) for words in rhymers: if words in needers: poss = poss.append({'High': words}, ignore_index=True) try: for syns1 in Word(words).synonyms(): if syns1 in needers: poss = poss.append({'Med': words}, ignore_index=True) for syns2 in Word(syns1).synonyms(): if syns2 in needers: poss = poss.append({'Low': words}, ignore_index=True) except: stuff = False out = scrub(poss) printRhymes(out) return out
def redact_concept(data, concept): from thesaurus import Word w = Word(concept) w1 = [] for i in w.synonyms(): w1.append(i.lower()) w1.append(concept) concept1 = '' #sent=data.split('\n') for i in nltk.word_tokenize(data): if i.lower() in w1: stats.append([i, len(i), 'Concept']) concept1 += '█' * len(i) concept1 += ' ' elif i == '.': concept1 += i concept1 += '' else: concept1 += i concept1 += ' ' return concept1
def reda_concept(data3, filename, con): tokens = nltk.word_tokenize(data3) w = Word(con) concept = w.synonyms() concept.append(con) for i in concept: for j in range(len(tokens)): if i.lower() == tokens[j].lower(): stats.append( [tokens[j], len(tokens[j]), filename, 'type:Concept']) tokens[j] = '█' * len(i) reda = '' for i in tokens: if i in ['.', ',', ':', ';', '"', '?', '!', '(', ')']: reda = reda + i else: reda = reda + i + ' ' return (data3)
def getSynonyms(word): syns = set() result = vb.synonym(word) if isinstance(result, list): result = json.loads(result) syns.update([synonym['text'] for synonym in result]) result = [] for syn in wordnet.synsets(word): for l in syn.lemmas(): syns.add(l.name()) w = Word(word) syns.update(w.synonyms()) if not syns: apiKey = os.getenv('BIG_HUGE') result = pyhugeconnector.thesaurus_entry(word=word, api_key=apiKey, pos_tag='n', ngram=2, relationship_type='syn') if isinstance(result, list): syns.update(result) return syns
def build(self): """ Returns a joke in an allternate VN form of Jape. Uses two verbs and two nouns. :return: """ while self.words[0] is None and self.words[1] is None: self.words[2], self.words[3] = get_homonyms("v") try: self.words[0] = Word( self.words[2]).synonyms(partOfSpeech=["verb"])[0] except: self.words[0] = None self.words[1] = None continue try: self.words[1] = Word( self.words[3]).synonyms(partOfSpeech=["noun"])[0] except: self.words[0] = None self.words[1] = None continue return "Why did someone " + self.words[0] + " a(n) " + self.words[ 1] + "? So they could " + self.words[2] + " the " + self.words[3]
def build(self): """ Returns a joke in Bazaar form, using 2 adjectives and 2 nouns. :return: """ while self.words[0] is None and self.words[1] is None: self.words[2], self.words[3] = get_homonyms("adj") try: self.words[0] = Word( self.words[2]).synonyms(partOfSpeech=["adj"])[0] except: self.words[0] = None self.words[1] = None continue try: self.words[1] = Word( self.words[3]).synonyms(partOfSpeech=["noun"])[0] except: self.words[0] = None self.words[1] = None continue return "What do you call a(n) " + self.words[0] + " " + self.words[ 1] + "? A(n) " + self.words[2] + " " + self.words[3] + "."