def get_synonym_complexities(self): if len(self.synonyms) > 1: synonym_scores = pd.DataFrame() synonym_scores['synonyms'] = self.synonyms synonym_scores['sem_sim'] = hf.get_elmo_score( self.synonyms, self.token_sent, self.index) synonym_scores[ 'complexity'] = complex_word.get_synonym_complexities( self.synonyms, self.token_sent, self.index) synonym_scores['grammaticality'] = hf.get_gram_score( self.synonyms, self.token_sent, self.pos_sent, self.index) #filtering process, return top word? synonym_scores = synonym_scores[synonym_scores['sem_sim'] < 0.15] synonym_scores = synonym_scores.sort_values(by=['complexity']) return list( zip(synonym_scores['synonyms'].values, synonym_scores['complexity'].values)) else: return None
def get_synonym_dataframe(self): if len(self.synonyms) > 1: synonym_scores = pd.DataFrame() synonym_scores['synonyms'] = self.synonyms synonym_scores['sem_sim'] = hf.get_elmo_score( self.synonyms, self.token_sent, self.index) synonym_scores[ 'complexity'] = complex_word.get_synonym_complexities( self.synonyms, self.token_sent, self.index) synonym_scores['grammaticality'] = hf.get_gram_score( self.synonyms, self.token_sent, self.pos_sent, self.index) synonym_scores = synonym_scores[synonym_scores['sem_sim'] < 0.3] #Can filter to only replace with words of lower threshold complexity #synonym_scores = synonym_scores[synonym_scores['complexity']<0.6] synonym_scores = synonym_scores[synonym_scores['grammaticality'] == 1] synonym_scores['combo'] = synonym_scores[ 'sem_sim'] + synonym_scores['complexity'] synonym_scores = synonym_scores.sort_values(by=['combo']) return synonym_scores else: return None
def get_ranked_synonyms(self): if len(self.synonyms) > 1: synonym_scores = pd.DataFrame() synonym_scores['synonyms'] = self.synonyms synonym_scores['sem_sim'] = hf.get_elmo_score( self.synonyms, self.token_sent, self.index) synonym_scores[ 'complexity'] = complex_word.get_synonym_complexities( self.synonyms, self.token_sent, self.index) synonym_scores['grammaticality'] = hf.get_gram_score( self.synonyms, self.token_sent, self.pos_sent, self.index) #filtering process synonym_scores = synonym_scores[synonym_scores['sem_sim'] < 0.15] synonym_scores = synonym_scores.sort_values(by=['complexity']) try: top_synomym = synonym_scores.synonyms.values[0] except: return [self.word] return top_synomym else: return [self.word]